diff --git a/config/script/apps-generator.py b/config/script/apps-generator.py new file mode 100644 index 0000000..a514ede --- /dev/null +++ b/config/script/apps-generator.py @@ -0,0 +1,49 @@ +import pymongo + + +client = pymongo.MongoClient("mongodb://IG:jdZwyec3Yb0yaBr8BPJoup1lfovAbGT342I2pX8wFqwCeLGvhXBLiL4vmhM@mongoprimary:27017/IG?authSource=IG") +db = client.get_database("IG") +collectionRes = db.get_collection("restaurant") +collectionLicense = db.get_collection("licence") +conf = '' + +for restaurant in collectionRes.find(): + + # dict has property 'domain' in restaurant + if 'domain' in restaurant: + print(restaurant['_id']) + # find one license with restaurantId + license = collectionLicense.find_one({'restaurantId': str (restaurant['_id'])}) + + + + + conf +='''server { + server_name '''+restaurant['domain']+'''; + + access_log /var/log/nginx/ig/acesss.'''+restaurant['domain']+'''.log; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8012; + proxy_read_timeout 90; + proxy_redirect off; + proxy_http_version 1.1; + } + + + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot +} + + +''' + +# save conf to file +with open('apps.conf', 'w') as f: + f.write(conf) diff --git a/config/sites-available/apps.conf b/config/sites-available/apps.conf new file mode 100644 index 0000000..d2ac388 --- /dev/null +++ b/config/sites-available/apps.conf @@ -0,0 +1,61 @@ +server{ + server_name demo.igarson.app; + access_log /var/log/nginx/ig/access.demo.log; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8012; + proxy_read_timeout 90; + proxy_redirect off; + proxy_http_version 1.1; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + +server{ + server_name terrace.kermanshah.igarson.app; + access_log /var/log/nginx/ig/access.demo.log; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8012; + proxy_read_timeout 90; + proxy_redirect off; + proxy_http_version 1.1; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + + + +server{ + if ($host = demo.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name demo.igarson.app; + listen 80; + return 404; # managed by Certbot +} + +server{ + if ($host = terrace.kermanshah.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name terrace.kermanshah.igarson.app; + listen 80; + return 404; # managed by Certbot +} \ No newline at end of file diff --git a/config/sites-available/base.conf b/config/sites-available/base.conf new file mode 100644 index 0000000..86453b4 --- /dev/null +++ b/config/sites-available/base.conf @@ -0,0 +1,245 @@ +server{ + server_name igarson.app www.igarson.app; + access_log /var/log/nginx/ig/access.www.log; + error_log /var/log/nginx/ig/error.www.log; + error_page 500 502 503 504 https://igarson.app/500; + error_page 401 403 https://igarson.app/500; + error_page 404 https://igarson.app/404; + + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8014; + proxy_read_timeout 90; + proxy_redirect off; + } + + location /warehouse/windows { + auth_request /api/v1/update/windows/validate; + root /data; + autoindex on; + } + + location = /api/v1/update/windows/validate { + internal; + proxy_set_header X-Original-URI $request_uri; + proxy_pass_request_body off; + proxy_pass http://127.0.0.1:8011; + } + + + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + +server{ + server_name api.igarson.app; + access_log /var/log/nginx/ig/access.api.log; + error_log /var/log/nginx/ig/error.api.log; + client_max_body_size 20M; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8011; + proxy_read_timeout 90; + } + + location /IG { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8011; + proxy_read_timeout 90; + proxy_http_version 1.1; + proxy_set_header Upgrade "websocket"; + proxy_set_header Connection "upgrade"; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/capitan.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/capitan.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + +server { + server_name storage.igarson.app; + access_log /var/log/nginx/ig/access.storage.log; + error_log /var/log/nginx/ig/error.storage.log; + + + location ~ ^/public/.*\.(ico|jpg|jpeg|gif|png|svg|json)$ { + root /volume/ig/api/storage; + add_header Cache-Control public; + add_header Cache-Control must-revalidate; + add_header 'Access-Control-Allow-Origin' '*' always; + add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS' always; + add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range' always; + add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; + autoindex on; + + } + + location ~ .*\.(ico|jpg|jpeg|gif|png|svg|json)$ { + root /volume/ig/ls/wwwroot; + add_header Cache-Control "public, max-age=86400"; + add_header Surrogate-Control "public, max-age=86400"; + add_header Cache-Control must-revalidate; + add_header 'Access-Control-Allow-Origin' '*' always; + add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS' always; + add_header 'Access-Control-Allow-Headers' 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range' always; + add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range' always; + autoindex on; + + + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/capitan.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/capitan.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + +server{ + server_name ls.igarson.app; + access_log /var/log/nginx/ig/access.ls.log; + error_log /var/log/nginx/ig/error.ls.log; + client_max_body_size 20M; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:4501; + proxy_read_timeout 90; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/capitan.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/capitan.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + +server{ + server_name capitan.igarson.app; + access_log /var/log/nginx/ig/access.capitan.log; + + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8013; + proxy_read_timeout 90; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + + +} + +server{ + server_name guardians.of.galaxy.igarson.app; + access_log /var/log/nginx/ig/access.guardians.of.galaxy.log; + error_log /var/log/nginx/ig/error.guardians.of.galaxy.log; + client_max_body_size 10M; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:6732; + proxy_read_timeout 90; + proxy_redirect off; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/capitan.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/capitan.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + + + + +# redirects +server{ + if ($host = www.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + + + if ($host = igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name igarson.app www.igarson.app; + listen 80; + return 404; # managed by Certbot + + +} + +server{ + if ($host = guardians.of.galaxy.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name guardians.of.galaxy.igarson.app; + listen 80; + return 404; # managed by Certbot +} + +server{ + if ($host = capitan.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name capitan.igarson.app; + listen 80; + return 404; # managed by Certbot +} + +server{ + if ($host = ls.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name ls.igarson.app; + listen 80; + return 404; # managed by Certbot +} + +server{ + if ($host = storage.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name storage.igarson.app; + listen 80; + return 404; # managed by Certbot +} + +server{ + if ($host = api.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name api.igarson.app; + listen 80; + return 404; # managed by Certbot +} + + diff --git a/config/sites-available/games.conf b/config/sites-available/games.conf new file mode 100644 index 0000000..2aa3ef4 --- /dev/null +++ b/config/sites-available/games.conf @@ -0,0 +1,194 @@ +server { + server_name mafia.game.igarson.app; + access_log /var/log/nginx/ig/access.game.mafia.log; + error_log /var/log/nginx/ig/error.game.mafia.log; + + location / { + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Server $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Real-IP $remote_addr; + proxy_pass http://127.0.0.1:6380; + proxy_read_timeout 90; + proxy_redirect off; + + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + +server { + server_name api.mafia.game.igarson.app; + access_log /var/log/nginx/ig/access.game.api.mafia.log; + error_log /var/log/nginx/ig/error.game.api.mafia.log; + client_max_body_size 10M; + + location / { + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Server $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Real-IP $remote_addr; + proxy_pass http://127.0.0.1:6300; + proxy_read_timeout 90; + proxy_redirect off; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + +server { + server_name 2048.game.igarson.app; + access_log /var/log/nginx/ig/access.game.2048.log; + + + location / { + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Server $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Real-IP $remote_addr; + proxy_pass http://127.0.0.1:6533; + proxy_read_timeout 90; + proxy_redirect off; + + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + +} + + + + +server{ + server_name sudoku.game.igarson.app; + access_log /var/log/nginx/ig/access.game.sudoku.log; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:6532; + proxy_read_timeout 90; + proxy_redirect off; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot +} + +server{ + server_name quiz.game.igarson.app; + access_log /var/log/nginx/ig/access.game.quiz.log; + error_log /var/log/nginx/ig/error.game.quiz.log; + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8097; + proxy_read_timeout 90; + } + + + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + + + +} +server{ + server_name api.quiz.game.igarson.app; + access_log /var/log/nginx/ig/access.game.api.quiz.log; + error_log /var/log/nginx/ig/error.game.api.quiz.log; + + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:8096; + proxy_read_timeout 90; + } + listen 443 ssl http2; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/demo.igarson.app/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/demo.igarson.app/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + + +} + + + +# redirects + + +server{ + if ($host = mafia.game.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name mafia.game.igarson.app; + listen 80; + return 404; # managed by Certbot +} + +server{ + if ($host = 2048.game.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name 2048.game.igarson.app; + listen 80; + return 404; # managed by Certbot +} +server{ + if ($host = api.mafia.game.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name api.mafia.game.igarson.app; + listen 80; + return 404; # managed by Certbot +} +server{ + if ($host = sudoku.game.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name sudoku.game.igarson.app; + listen 80; + return 404; # managed by Certbot +} +server{ + if ($host = quiz.game.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name quiz.game.igarson.app; + listen 80; + return 404; # managed by Certbot +} +server{ + if ($host = api.quiz.game.igarson.app) { + return 301 https://$host$request_uri; + } # managed by Certbot + server_name api.quiz.game.igarson.app; + listen 80; + return 404; # managed by Certbot +} \ No newline at end of file diff --git a/config/sites-available/igarson.conf b/config/sites-available/igarson.conf new file mode 100644 index 0000000..5bfa610 --- /dev/null +++ b/config/sites-available/igarson.conf @@ -0,0 +1,3 @@ +include /etc/nginx/sites-available/base.conf; +include /etc/nginx/sites-available/apps.conf; +include /etc/nginx/sites-available/games.conf; \ No newline at end of file diff --git a/passwords b/passwords new file mode 100644 index 0000000..f5fdc20 --- /dev/null +++ b/passwords @@ -0,0 +1,25 @@ +#mongo +MONGODB_IG_PASSWORD = jdZwyec3Yb0yaBr8BP+Joup1l/fovAbGT342I2pX8w+X+FqwCeLGvhXBLiL4vmhM +MONGODB_ADVERTISED_HOSTNAME=mongoprimary +MONGODB_REPLICA_SET_MODE=primary +MONGODB_ROOT_PASSWORD=cWv1WQvWEp+LPdax9We/M6PeT7KgyTE9zKmC5y1ieqz4JtpLiVifJrQg0VHYhE6l +MONGODB_REPLICA_SET_KEY=gp39MND7udY6bt9V3h9u+dhxToop4WZTZ2Umn810snSYurlSIPqs/oy0YfjftsBz +#pg +POSTGRESQL_USERNAME=igarsonAgent +POSTGRESQL_PASSWORD=xHTpBf4wC+bBeNg2pL6Ga7VEWKFJx7VPEUpqxwPFfOc2YYTVwFQuHfsiqoVeT9+6 +POSTGRESQL_DATABASE=igarsonDB +POSTGRESQL_REPLICATION_MODE=master +POSTGRESQL_REPLICATION_USER=pgreplicator +POSTGRESQL_REPLICATION_PASSWORD=ciXz6xmnEMZSO+0T8L6mGcFJrAvPzkTC04oh/WYIRi51gMQLPfW8tTEHALX6fhk4 +#rabbit +RABBITMQ_PASSWORD=muyXH/zymcTYLzk3wYnIwG+UJWECKy0ViUDO+UlCmPF3XS+2kliuV0TaA0mWf6lT +RABBITMQ_USERNAME=igarsonAgent +#redis +REDIS_REPLICATION_MODE=slave +REDIS_MASTER_HOST=redismaster +REDIS_MASTER_PORT_NUMBER=6379 +REDIS_MASTER_PASSWORD=Mf7VXwbaJQuMRd1sETVl79A7URUTyiEFwctGk3RhD+q74DmKDzc2SztYEZ2YynLZI8xuq7b9gNz2e7g2MNfU9sP8q+bEK9CqAu3y2Zu4xkVyzyYKdFMT696IB/SS1M7rvZKw7NBYT99lGCpj1edk+6hRpK/Qy2pfiVo29VKEgSXnkeJaM9nMJQ3wxN/1gLJd/uTGGf85nZWhUTEIdK58lJPSXLU55VqcaksahJHlg2oz5P5FNXWRPCnSC7obJ4OtkuHNw/P8REEL0KDyZ9khilU/XvzSgzp+v5i9hmUdKO3B8r+Y4keUZyaKz2VxCHSGAuqWSCpe+do1qQ== +REDIS_PASSWORD=gpGS7RezaKsWRH+qga95z6JTTxvecrpFShRR9IXqWZO2o5Kf8YAoNvGni9Zh5GGIM0Oz+e9+Cph9XDjfs3phgauiFtsEDkAf/1dxR8CixIzddcXKXgaawiqfwB8WCuM/2k8Iw7cv2wqoIdWrhdHbn96N//Su57Ri4hy5CRbUAl7VmGd9EL1sGytOJmXzQOi3lJBWvfVWbYBmgALwVQSads6g+OHmP2wpXsTsYMG1thppJVGo7i5Rh515hAuAGWTed4Ayqoe1muRR1L1Rs8pdA7IH/u4kOL1758Idd3BfTTRkr7xfjXY40dM0BkUqL5E4rOga38ThaVC99g== +#monitor +ADMIN_USER=nerdguy +ADMIN_PASSWORD=RbBmIu1LCgIqKcMmopc1Ie21+1OKsX1ktNCAeiViNIb6HAKkzjJ+GhZlLysItwgm diff --git a/playbooks/server/addDomainAndEncrypt.yml b/playbooks/server/addDomainAndEncrypt.yml new file mode 100644 index 0000000..3458219 --- /dev/null +++ b/playbooks/server/addDomainAndEncrypt.yml @@ -0,0 +1,32 @@ +--- +- name: letsencrypt + hosts: supermaster + become: yes + vars_files: + - var/domains.yml + tasks: + # - name: add domain to nginx with j2 block + # template: + # src: template/nginx-server-block.j2 + # dest: /tmp/new-nginx-server-block.{{item.server_name}}.conf + # with_items: + # - "{{ newDomains }}" + + # - name: append /tmp/new-nginx-server-block.conf to specific conf + # shell: | + # cat /tmp/new-nginx-server-block.{{item.server_name}}.conf >> /etc/nginx/sites-available/{{ item.conf_file }} + # rm -rf /tmp/new-nginx-server-block.{{item.server_name}}.conf + # with_items: + # - "{{ newDomains }}" + + - name: encrypt all domains + expect: + command: certbot --nginx -d "{{ item.server_name }}" + responses: + Question: + - 2 + - 2 + with_items: + - "{{ newDomains }}" + + \ No newline at end of file diff --git a/playbooks/initialServer/build.yml b/playbooks/server/build.yml similarity index 91% rename from playbooks/initialServer/build.yml rename to playbooks/server/build.yml index c901ad8..27787a3 100644 --- a/playbooks/initialServer/build.yml +++ b/playbooks/server/build.yml @@ -156,6 +156,13 @@ shell: reboot ignore_errors: yes + + - name: add enviroments to all servers + shell: | + echo "export ADMIN_USER=nerdguy" >> /etc/environment + echo "export ADMIN_PASSWORD=RbBmIu1LCgIqKcMmopc1Ie21+1OKsX1ktNCAeiViNIb6HAKkzjJ+GhZlLysItwgm" >> /etc/environment + + diff --git a/playbooks/server/env.yml b/playbooks/server/env.yml new file mode 100644 index 0000000..d676cc2 --- /dev/null +++ b/playbooks/server/env.yml @@ -0,0 +1,18 @@ +--- +- name: env modifier + hosts: all + become: yes + tasks: + - name: flush /etc/enviroments + shell: | + > /etc/environment + - name: add enviroments to all servers + shell: | + export {{item}} + echo {{item}} >> /etc/environment + with_items: + - PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin" + - DOMAIN=igarson.app + - ADMIN_USER=nerdguy + - ADMIN_PASSWORD=RbBmIu1LCgIqKcMmopc1Ie21+1OKsX1ktNCAeiViNIb6HAKkzjJ+GhZlLysItwgm + \ No newline at end of file diff --git a/playbooks/initialServer/hosts b/playbooks/server/hosts similarity index 100% rename from playbooks/initialServer/hosts rename to playbooks/server/hosts diff --git a/playbooks/initialServer/letsencrypt.yml b/playbooks/server/letsencrypt.yml similarity index 100% rename from playbooks/initialServer/letsencrypt.yml rename to playbooks/server/letsencrypt.yml diff --git a/playbooks/initialServer/masters.yml b/playbooks/server/masters.yml similarity index 100% rename from playbooks/initialServer/masters.yml rename to playbooks/server/masters.yml diff --git a/playbooks/server/nginx.yml b/playbooks/server/nginx.yml new file mode 100644 index 0000000..9e9ce45 --- /dev/null +++ b/playbooks/server/nginx.yml @@ -0,0 +1,19 @@ +--- +- name: sync new apps to nginx + hosts: supermaster + become: yes + tasks: + - name: copy script + copy: + src: ../../config/script/apps-generator.py + dest: /tmp/apps-generator.py + owner: root + group: root + mode: u=rw,g=r,o=r + + - name: run script ans sync apps.conf + shell: docker run --rm -it -v /tmp/apps-generator.py:/apps-generator.py -v /etc/nginx/sites-available/apps.conf:/tmp/apps.conf --network mother registry.vnfco.ir/library/python:3.10.1-alpine3.15 pip3 install pymongo && python3 /apps-generator.py + + + + \ No newline at end of file diff --git a/playbooks/initialServer/reboot.yml b/playbooks/server/reboot.yml similarity index 100% rename from playbooks/initialServer/reboot.yml rename to playbooks/server/reboot.yml diff --git a/playbooks/initialServer/supermaster.yml b/playbooks/server/supermaster.yml similarity index 82% rename from playbooks/initialServer/supermaster.yml rename to playbooks/server/supermaster.yml index 172a8fe..a9669d2 100644 --- a/playbooks/initialServer/supermaster.yml +++ b/playbooks/server/supermaster.yml @@ -41,6 +41,17 @@ mkdir -p /volume/dns mkdir -p /volume/nginx mkdir -p /volume/portainer + mkdir -p /volume/mongo/primary + mkdir -p /data/warehouse/windows + mkdir -p /var/log/nginx/ig/ + chown -R 1001:1001 /volume/mongo/primary + chown -R 1001:1001 /volume/pg + chown -R 1001:1001 /volume/redis + chown -R 1001:1001 /volume/rabbit + chmod -R 755 /data/warehouse/windows + + + - name: create nfs export shell: | diff --git a/playbooks/server/sync-rawnginx.yml b/playbooks/server/sync-rawnginx.yml new file mode 100644 index 0000000..0b61bf7 --- /dev/null +++ b/playbooks/server/sync-rawnginx.yml @@ -0,0 +1,19 @@ +--- +- name: sync all conf files for nginx + hosts: supermaster + become: yes + tasks: + - name: copy files + copy: + src: ../../config/sites-available + dest: /etc/nginx/ + owner: root + group: root + mode: '0644' + + + - name: create link from sites-available to sites-enabled + shell: | + rm -rf /etc/nginx/sites-enabled/* + ln -s /etc/nginx/sites-available/* /etc/nginx/sites-enabled/ + \ No newline at end of file diff --git a/playbooks/initialServer/sync-stacks.yml b/playbooks/server/sync-stacks.yml similarity index 100% rename from playbooks/initialServer/sync-stacks.yml rename to playbooks/server/sync-stacks.yml diff --git a/playbooks/server/template/nginx-server-block.j2 b/playbooks/server/template/nginx-server-block.j2 new file mode 100644 index 0000000..bbb8347 --- /dev/null +++ b/playbooks/server/template/nginx-server-block.j2 @@ -0,0 +1,17 @@ +server{ + server_name {{ item.server_name }}; + access_log /var/log/nginx/ig/access.{{item.server_name}}.log; + {{ item.server_config }} + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:{{ item.service_port}}; + proxy_read_timeout 90; + proxy_redirect off; + proxy_http_version 1.1; + } + listen 80; + +} \ No newline at end of file diff --git a/playbooks/server/var/domains.yml b/playbooks/server/var/domains.yml new file mode 100644 index 0000000..c0a5c3f --- /dev/null +++ b/playbooks/server/var/domains.yml @@ -0,0 +1,19 @@ +--- +domains: + - www.igarson.app + - igarson.app + - api.igarson.app + - ls.igarson.app + - storage.igarson.app + - guardians.of.galaxy.igarson.app + - capitan.igarson.app + - mafia.game.igarson.app + - api.mafia.game.igarson.app + - quiz.game.igarson.app + - api.quiz.game.igarson.app + - sudoku.game.igarson.app + - 2048.game.igarson.app + - demo.igarson.app + - terrace.kermanshah.igarson.app + - champions.of.galaxy.igarson.app + diff --git a/playbooks/initialServer/var/hosts.yml b/playbooks/server/var/hosts.yml similarity index 100% rename from playbooks/initialServer/var/hosts.yml rename to playbooks/server/var/hosts.yml diff --git a/playbooks/initialServer/var/registry.yml b/playbooks/server/var/registry.yml similarity index 100% rename from playbooks/initialServer/var/registry.yml rename to playbooks/server/var/registry.yml diff --git a/playbooks/initialServer/var/swarm.yml b/playbooks/server/var/swarm.yml similarity index 100% rename from playbooks/initialServer/var/swarm.yml rename to playbooks/server/var/swarm.yml diff --git a/playbooks/initialServer/workers.yml b/playbooks/server/workers.yml similarity index 100% rename from playbooks/initialServer/workers.yml rename to playbooks/server/workers.yml diff --git a/playbooks/swarm/caddy/Caddyfile b/playbooks/swarm/caddy/Caddyfile new file mode 100644 index 0000000..e1b46ec --- /dev/null +++ b/playbooks/swarm/caddy/Caddyfile @@ -0,0 +1,40 @@ +:9090 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / prometheus:9090 { + transparent + } + + errors stderr + tls off +} + +:9093 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / alertmanager:9093 { + transparent + } + + errors stderr + tls off +} + +:9094 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / unsee:8080 { + transparent + } + + errors stderr + tls off +} + +:3000 { + proxy / grafana:3000 { + transparent + websocket + } + + errors stderr + tls off +} + diff --git a/playbooks/swarm/dockerd-exporter/Caddyfile b/playbooks/swarm/dockerd-exporter/Caddyfile new file mode 100644 index 0000000..5036d47 --- /dev/null +++ b/playbooks/swarm/dockerd-exporter/Caddyfile @@ -0,0 +1,8 @@ +:9323 { + proxy / {$DOCKER_GWBRIDGE_IP}:9323 { + transparent + } + + errors stderr + tls off +} diff --git a/playbooks/swarm/prometheus/Dockerfile b/playbooks/swarm/prometheus/Dockerfile new file mode 100644 index 0000000..cb31a08 --- /dev/null +++ b/playbooks/swarm/prometheus/Dockerfile @@ -0,0 +1,10 @@ +FROM prom/prometheus:v2.5.0 +# https://hub.docker.com/r/prom/prometheus/tags/ + +ENV WEAVE_TOKEN=none + +COPY conf /etc/prometheus/ + +ENTRYPOINT [ "/etc/prometheus/docker-entrypoint.sh" ] +CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ + "--storage.tsdb.path=/prometheus" ] diff --git a/playbooks/swarm/prometheus/conf/docker-entrypoint.sh b/playbooks/swarm/prometheus/conf/docker-entrypoint.sh new file mode 100755 index 0000000..3acd9f3 --- /dev/null +++ b/playbooks/swarm/prometheus/conf/docker-entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/sh -e + +cat /etc/prometheus/prometheus.yml > /tmp/prometheus.yml +cat /etc/prometheus/weave-cortex.yml | \ + sed "s@#password: #@password: '$WEAVE_TOKEN'@g" > /tmp/weave-cortex.yml + +#JOBS=mongo-exporter:9111 redis-exporter:9112 + +if [ ${JOBS+x} ]; then + +for job in $JOBS +do +echo "adding job $job" + +SERVICE=$(echo "$job" | cut -d":" -f1) +PORT=$(echo "$job" | cut -d":" -f2) + +cat >>/tmp/prometheus.yml <>/tmp/weave-cortex.yml <# + +global: + scrape_interval: 15s + evaluation_interval: 15s + + external_labels: + monitor: 'promswarm' + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'dockerd-exporter' + dns_sd_configs: + - names: + - 'tasks.dockerd-exporter' + type: 'A' + port: 9323 + + - job_name: 'cadvisor' + dns_sd_configs: + - names: + - 'tasks.cadvisor' + type: 'A' + port: 8080 + + - job_name: 'node-exporter' + dns_sd_configs: + - names: + - 'tasks.node-exporter' + type: 'A' + port: 9100 diff --git a/playbooks/swarm/prometheus/rules/swarm_node.rules.yml b/playbooks/swarm/prometheus/rules/swarm_node.rules.yml new file mode 100644 index 0000000..5b0eaaf --- /dev/null +++ b/playbooks/swarm/prometheus/rules/swarm_node.rules.yml @@ -0,0 +1,44 @@ +groups: +- name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_node.rules.yml + rules: + - alert: node_cpu_usage + expr: 100 - (avg(irate(node_cpu_seconds_total{mode="idle"}[1m]) * ON(instance) GROUP_LEFT(node_name) + node_meta * 100) BY (node_name)) > 50 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize + $value}}%. + summary: CPU alert for Swarm node '{{ $labels.node_name }}' + - alert: node_memory_usage + expr: sum(((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) + * ON(instance) GROUP_LEFT(node_name) node_meta * 100) BY (node_name) > 80 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} memory usage is at {{ humanize + $value}}%. + summary: Memory alert for Swarm node '{{ $labels.node_name }}' + - alert: node_disk_usage + expr: ((node_filesystem_size_bytes{mountpoint="/rootfs"} - node_filesystem_free_bytes{mountpoint="/rootfs"}) + * 100 / node_filesystem_size_bytes{mountpoint="/rootfs"}) * ON(instance) GROUP_LEFT(node_name) + node_meta > 85 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize + $value}}%. + summary: Disk alert for Swarm node '{{ $labels.node_name }}' + - alert: node_disk_fill_rate_6h + expr: predict_linear(node_filesystem_free_bytes{mountpoint="/rootfs"}[1h], 6 * 3600) * ON(instance) + GROUP_LEFT(node_name) node_meta < 0 + for: 1h + labels: + severity: critical + annotations: + description: Swarm node {{ $labels.node_name }} disk is going to fill up in + 6h. + summary: Disk fill alert for Swarm node '{{ $labels.node_name }}' diff --git a/playbooks/swarm/prometheus/rules/swarm_task.rules.yml b/playbooks/swarm/prometheus/rules/swarm_task.rules.yml new file mode 100644 index 0000000..db9aa7e --- /dev/null +++ b/playbooks/swarm/prometheus/rules/swarm_task.rules.yml @@ -0,0 +1,24 @@ +groups: +- name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_task.rules.yml + rules: + - alert: task_high_cpu_usage_50 + expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m])) + BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) + * 100 > 50 + for: 1m + annotations: + description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ + $labels.container_label_com_docker_swarm_node_id }}'' CPU usage is at {{ humanize + $value}}%.' + summary: CPU alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name + }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' + - alert: task_high_memory_usage_1g + expr: sum(container_memory_rss{container_label_com_docker_swarm_task_name=~".+"}) + BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) > 1e+09 + for: 1m + annotations: + description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ + $labels.container_label_com_docker_swarm_node_id }}'' memory usage is {{ humanize + $value}}.' + summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name + }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' diff --git a/playbooks/swarm/swarmprom.config.yml b/playbooks/swarm/swarmprom.config.yml new file mode 100644 index 0000000..7242c20 --- /dev/null +++ b/playbooks/swarm/swarmprom.config.yml @@ -0,0 +1,20 @@ +--- +- name: copy config file + hosts: supermaster + become: yes + tasks: + - name: copy caddyfile into /volume + copy: + src: caddy/Caddyfile + dest: /volume/config/caddy/Caddyfile + - name: copy caddyfile dockerd into /volume + copy: + src: dockerd-exporter/Caddyfile + dest: /volume/config/dockerd-exporter/Caddyfile + - name: copy prometeus rules + copy: + src: prometheus/rules/{{item}} + dest: /volume/config/prometheus/rules/{{item}} + with_items: + - swarm_node.rules.yml + - swarm_task.rules.yml \ No newline at end of file diff --git a/port-range b/port-range new file mode 100644 index 0000000..f70c80f --- /dev/null +++ b/port-range @@ -0,0 +1,19 @@ +6000-6050 = games +{ +6001 2048 +6002 sudoku +6003 mafia +6023 api mafia +6003 quiz ==> must change +6023 api quiz ==> must change +} + +8000-9000 = base +{ +4501 ls.igarson.app ==> must change 8010 +8011 api.igarson.app +8012 demo.igarson.app +8013 capitan.igarson.app +8014 igarson.app +6732 guardians.of.galaxy.igarson.app ==> must change 8015 +} \ No newline at end of file diff --git a/stacks/lvl1/stack.yml b/stacks/lvl1/stack.yml deleted file mode 100644 index 363f86d..0000000 --- a/stacks/lvl1/stack.yml +++ /dev/null @@ -1,62 +0,0 @@ -version: "3.7" - -networks: - mother: - external: true - - -services: - dns: - image: registry.vnfco.ir/library/ameersbn/bind:9.16.1-20200524 - deploy: - placement: - constraints: [node.role == manager] - update_config: - parallelism: 1 - delay: 10s - order: start-first - restart_policy: - condition: any - delay: 5s - max_attempts: 3 - window: 120s - volumes: - - /volume/dns:/data - environment: - - ROOT_PASSWORD=9197279882 - - WEBMIN_INIT_SSL_ENABLED=false - ports: - - "10000:10000/tcp" - - "53:53/tcp" - networks: - - mother - - # nginx: - # image: registry.vnfco.ir/library/ameersbn/bind:9.16.1-20200524 - # deploy: - # placement: - # constraints: - # - node.label.master==true - # update_config: - # parallelism: 1 - # delay: 10s - # order: start-stop - # restart_policy: - # condition: any - # delay: 5s - # max_attempts: 3 - # window: 120s - # volumes: - # - /volume/dns:/data - # environment: - # - ROOT_PASSWORD=9197279882 - # ports: - # - "10000:10000/tcp" - # - "53:53/tcp" - # - "53:53/udp" - # networks: - # - mother - - - - diff --git a/stacks/lvl1/swarmprom/.gitattributes b/stacks/lvl1/swarmprom/.gitattributes new file mode 100644 index 0000000..f0445c4 --- /dev/null +++ b/stacks/lvl1/swarmprom/.gitattributes @@ -0,0 +1,3 @@ +# Denote all files that are truly binary and should not be modified. +*.png binary +*.jpg binary diff --git a/stacks/lvl1/swarmprom/.gitignore b/stacks/lvl1/swarmprom/.gitignore new file mode 100644 index 0000000..e601e02 --- /dev/null +++ b/stacks/lvl1/swarmprom/.gitignore @@ -0,0 +1,17 @@ +# Binaries for programs and plugins +*.exe +*.dll +*.so +*.dylib + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 +.glide/ + +.idea/ +.DS_Store diff --git a/stacks/lvl1/swarmprom/.travis.yml b/stacks/lvl1/swarmprom/.travis.yml new file mode 100644 index 0000000..d936667 --- /dev/null +++ b/stacks/lvl1/swarmprom/.travis.yml @@ -0,0 +1,32 @@ +sudo: required + +services: + - docker + +before_install: + - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + - sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + - sudo apt-get update + - sudo apt-get -y install docker-ce + - sudo service docker restart + +script: + - cd prometheus && docker build -t stefanprodan/swarmprom-prometheus:$TRAVIS_BUILD_NUMBER . + - cd .. && cd node-exporter && docker build -t stefanprodan/swarmprom-node-exporter:$TRAVIS_BUILD_NUMBER . + - cd .. && cd alertmanager && docker build -t stefanprodan/swarmprom-alertmanager:$TRAVIS_BUILD_NUMBER . + - cd .. && cd grafana && docker build -t stefanprodan/swarmprom-grafana:$TRAVIS_BUILD_NUMBER . + +after_success: + - if [ -z "$DOCKER_USER" ]; then + echo "PR build, skipping Docker Hub push"; + else + docker login -u "$DOCKER_USER" -p "$DOCKER_PASS"; + docker tag stefanprodan/swarmprom-prometheus:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-prometheus:v2.5.0; + docker push stefanprodan/swarmprom-prometheus:v2.5.0; + docker tag stefanprodan/swarmprom-node-exporter:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-node-exporter:v0.16.0; + docker push stefanprodan/swarmprom-node-exporter:v0.16.0; + docker tag stefanprodan/swarmprom-alertmanager:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-alertmanager:v0.15.3; + docker push stefanprodan/swarmprom-alertmanager:v0.15.3; + docker tag stefanprodan/swarmprom-grafana:$TRAVIS_BUILD_NUMBER stefanprodan/swarmprom-grafana:5.3.4; + docker push stefanprodan/swarmprom-grafana:5.3.4; + fi diff --git a/stacks/lvl1/swarmprom/LICENSE b/stacks/lvl1/swarmprom/LICENSE new file mode 100644 index 0000000..cc562a0 --- /dev/null +++ b/stacks/lvl1/swarmprom/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Stefan Prodan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/stacks/lvl1/swarmprom/README.md b/stacks/lvl1/swarmprom/README.md new file mode 100644 index 0000000..d3774a7 --- /dev/null +++ b/stacks/lvl1/swarmprom/README.md @@ -0,0 +1,550 @@ +# swarmprom + +Swarmprom is a starter kit for Docker Swarm monitoring with [Prometheus](https://prometheus.io/), +[Grafana](http://grafana.org/), +[cAdvisor](https://github.com/google/cadvisor), +[Node Exporter](https://github.com/prometheus/node_exporter), +[Alert Manager](https://github.com/prometheus/alertmanager) +and [Unsee](https://github.com/cloudflare/unsee). + +## Install + +Clone this repository and run the monitoring stack: + +```bash +$ git clone https://github.com/stefanprodan/swarmprom.git +$ cd swarmprom + +ADMIN_USER=admin \ +ADMIN_PASSWORD=admin \ +SLACK_URL=https://hooks.slack.com/services/TOKEN \ +SLACK_CHANNEL=devops-alerts \ +SLACK_USER=alertmanager \ +docker stack deploy -c docker-compose.yml mon +``` + +Prerequisites: + +* Docker CE 17.09.0-ce or Docker EE 17.06.2-ee-3 +* Swarm cluster with one manager and a worker node +* Docker engine experimental enabled and metrics address set to `0.0.0.0:9323` + +Services: + +* prometheus (metrics database) `http://:9090` +* grafana (visualize metrics) `http://:3000` +* node-exporter (host metrics collector) +* cadvisor (containers metrics collector) +* dockerd-exporter (Docker daemon metrics collector, requires Docker experimental metrics-addr to be enabled) +* alertmanager (alerts dispatcher) `http://:9093` +* unsee (alert manager dashboard) `http://:9094` +* caddy (reverse proxy and basic auth provider for prometheus, alertmanager and unsee) + + +## Alternative install with Traefik and HTTPS + +If you have a Docker Swarm cluster with a global Traefik set up as described in [DockerSwarm.rocks](https://dockerswarm.rocks), you can deploy Swarmprom integrated with that global Traefik proxy. + +This way, each Swarmprom service will have its own domain, and each of them will be served using HTTPS, with certificates generated (and renewed) automatically. + +### Requisites + +These instructions assume you already have Traefik set up following that guide above, in short: + +* With automatic HTTPS certificate generation. +* A Docker Swarm network `traefik-public`. +* Filtering to only serve containers with a label `traefik.constraint-label=traefik-public`. + +### Instructions + +* Clone this repository and enter into the directory: + +```bash +$ git clone https://github.com/stefanprodan/swarmprom.git +$ cd swarmprom +``` + +* Set and export an `ADMIN_USER` environment variable: + +```bash +export ADMIN_USER=admin +``` + +* Set and export an `ADMIN_PASSWORD` environment variable: + + +```bash +export ADMIN_PASSWORD=changethis +``` + +* Set and export a hashed version of the `ADMIN_PASSWORD` using `openssl`, it will be used by Traefik's HTTP Basic Auth for most of the services: + +```bash +export HASHED_PASSWORD=$(openssl passwd -apr1 $ADMIN_PASSWORD) +``` + +* You can check the contents with: + +```bash +echo $HASHED_PASSWORD +``` + +it will look like: + +``` +$apr1$89eqM5Ro$CxaFELthUKV21DpI3UTQO. +``` + +* Create and export an environment variable `DOMAIN`, e.g.: + +```bash +export DOMAIN=example.com +``` + +and make sure that the following sub-domains point to your Docker Swarm cluster IPs: + +* `grafana.example.com` +* `alertmanager.example.com` +* `unsee.example.com` +* `prometheus.example.com` + +(and replace `example.com` with your actual domain). + +**Note**: You can also use a subdomain, like `swarmprom.example.com`. Just make sure that the subdomains point to (at least one of) your cluster IPs. Or set up a wildcard subdomain (`*`). + +* If you are using Slack and want to integrate it, set the following environment variables: + +```bash +export SLACK_URL=https://hooks.slack.com/services/TOKEN +export SLACK_CHANNEL=devops-alerts +export SLACK_USER=alertmanager +``` + +**Note**: by using `export` when declaring all the environment variables above, the next command will be able to use them. + +* Deploy the Traefik version of the stack: + + +```bash +docker stack deploy -c docker-compose.traefik.yml swarmprom +``` + +To test it, go to each URL: + +* `https://grafana.example.com` +* `https://alertmanager.example.com` +* `https://unsee.example.com` +* `https://prometheus.example.com` + + +## Setup Grafana + +Navigate to `http://:3000` and login with user ***admin*** password ***admin***. +You can change the credentials in the compose file or +by supplying the `ADMIN_USER` and `ADMIN_PASSWORD` environment variables at stack deploy. + +Swarmprom Grafana is preconfigured with two dashboards and Prometheus as the default data source: + +* Name: Prometheus +* Type: Prometheus +* Url: http://prometheus:9090 +* Access: proxy + +After you login, click on the home drop down, in the left upper corner and you'll see the dashboards there. + +***Docker Swarm Nodes Dashboard*** + +![Nodes](https://raw.githubusercontent.com/stefanprodan/swarmprom/master/grafana/screens/swarmprom-nodes-dash-v3.png) + +URL: `http://:3000/dashboard/db/docker-swarm-nodes` + +This dashboard shows key metrics for monitoring the resource usage of your Swarm nodes and can be filtered by node ID: + +* Cluster up-time, number of nodes, number of CPUs, CPU idle gauge +* System load average graph, CPU usage graph by node +* Total memory, available memory gouge, total disk space and available storage gouge +* Memory usage graph by node (used and cached) +* I/O usage graph (read and write Bps) +* IOPS usage (read and write operation per second) and CPU IOWait +* Running containers graph by Swarm service and node +* Network usage graph (inbound Bps, outbound Bps) +* Nodes list (instance, node ID, node name) + +***Docker Swarm Services Dashboard*** + +![Nodes](https://raw.githubusercontent.com/stefanprodan/swarmprom/master/grafana/screens/swarmprom-services-dash-v3.png) + +URL: `http://:3000/dashboard/db/docker-swarm-services` + +This dashboard shows key metrics for monitoring the resource usage of your Swarm stacks and services, can be filtered by node ID: + +* Number of nodes, stacks, services and running container +* Swarm tasks graph by service name +* Health check graph (total health checks and failed checks) +* CPU usage graph by service and by container (top 10) +* Memory usage graph by service and by container (top 10) +* Network usage graph by service (received and transmitted) +* Cluster network traffic and IOPS graphs +* Docker engine container and network actions by node +* Docker engine list (version, node id, OS, kernel, graph driver) + +***Prometheus Stats Dashboard*** + +![Nodes](https://raw.githubusercontent.com/stefanprodan/swarmprom/master/grafana/screens/swarmprom-prometheus-dash-v3.png) + +URL: `http://:3000/dashboard/db/prometheus` + +* Uptime, local storage memory chunks and series +* CPU usage graph +* Memory usage graph +* Chunks to persist and persistence urgency graphs +* Chunks ops and checkpoint duration graphs +* Target scrapes, rule evaluation duration, samples ingested rate and scrape duration graphs + + +## Prometheus service discovery + +In order to collect metrics from Swarm nodes you need to deploy the exporters on each server. +Using global services you don't have to manually deploy the exporters. When you scale up your +cluster, Swarm will launch a cAdvisor, node-exporter and dockerd-exporter instance on the newly created nodes. +All you need is an automated way for Prometheus to reach these instances. + +Running Prometheus on the same overlay network as the exporter services allows you to use the DNS service +discovery. Using the exporters service name, you can configure DNS discovery: + +```yaml +scrape_configs: + - job_name: 'node-exporter' + dns_sd_configs: + - names: + - 'tasks.node-exporter' + type: 'A' + port: 9100 + - job_name: 'cadvisor' + dns_sd_configs: + - names: + - 'tasks.cadvisor' + type: 'A' + port: 8080 + - job_name: 'dockerd-exporter' + dns_sd_configs: + - names: + - 'tasks.dockerd-exporter' + type: 'A' + port: 9323 +``` + +When Prometheus runs the DNS lookup, Docker Swarm will return a list of IPs for each task. +Using these IPs, Prometheus will bypass the Swarm load-balancer and will be able to scrape each exporter +instance. + +The problem with this approach is that you will not be able to tell which exporter runs on which node. +Your Swarm nodes' real IPs are different from the exporters IPs since exporters IPs are dynamically +assigned by Docker and are part of the overlay network. +Swarm doesn't provide any records for the tasks DNS, besides the overlay IP. +If Swarm provides SRV records with the nodes hostname or IP, you can re-label the source +and overwrite the overlay IP with the real IP. + +In order to tell which host a node-exporter instance is running, I had to create a prom file inside +the node-exporter containing the hostname and the Docker Swarm node ID. + +When a node-exporter container starts `node-meta.prom` is generated with the following content: + +```bash +"node_meta{node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" +``` + +The node ID value is supplied via `{{.Node.ID}}` and the node name is extracted from the `/etc/hostname` +file that is mounted inside the node-exporter container. + +```yaml + node-exporter: + image: stefanprodan/swarmprom-node-exporter + environment: + - NODE_ID={{.Node.ID}} + volumes: + - /etc/hostname:/etc/nodename + command: + - '-collector.textfile.directory=/etc/node-exporter/' +``` + +Using the textfile command, you can instruct node-exporter to collect the `node_meta` metric. +Now that you have a metric containing the Docker Swarm node ID and name, you can use it in promql queries. + +Let's say you want to find the available memory on each node, normally you would write something like this: + +``` +sum(node_memory_MemAvailable) by (instance) + +{instance="10.0.0.5:9100"} 889450496 +{instance="10.0.0.13:9100"} 1404162048 +{instance="10.0.0.15:9100"} 1406574592 +``` + +The above result is not very helpful since you can't tell what Swarm node is behind the instance IP. +So let's write that query taking into account the node_meta metric: + +```sql +sum(node_memory_MemAvailable * on(instance) group_left(node_id, node_name) node_meta) by (node_id, node_name) + +{node_id="wrdvtftteo0uaekmdq4dxrn14",node_name="swarm-manager-1"} 889450496 +{node_id="moggm3uaq8tax9ptr1if89pi7",node_name="swarm-worker-1"} 1404162048 +{node_id="vkdfx99mm5u4xl2drqhnwtnsv",node_name="swarm-worker-2"} 1406574592 +``` + +This is much better. Instead of overlay IPs, now I can see the actual Docker Swarm nodes ID and hostname. Knowing the hostname of your nodes is useful for alerting as well. + +You can define an alert when available memory reaches 10%. You also will receive the hostname in the alert message +and not some overlay IP that you can't correlate to a infrastructure item. + +Maybe you are wondering why you need the node ID if you have the hostname. The node ID will help you match +node-exporter instances to cAdvisor instances. All metrics exported by cAdvisor have a label named `container_label_com_docker_swarm_node_id`, +and this label can be used to filter containers metrics by Swarm nodes. + +Let's write a query to find out how many containers are running on a Swarm node. +Knowing from the `node_meta` metric all the nodes IDs you can define a filter with them in Grafana. +Assuming the filter is `$node_id` the container count query should look like this: + +``` +count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~"$node_id"}[5m])) +``` + +Another use case for node ID is filtering the metrics provided by the Docker engine daemon. +Docker engine doesn't have a label with the node ID attached on every metric, but there is a `swarm_node_info` +metric that has this label. If you want to find out the number of failed health checks on a Swarm node +you would write a query like this: + +``` +sum(engine_daemon_health_checks_failed_total) * on(instance) group_left(node_id) swarm_node_info{node_id=~"$node_id"}) +``` + +For now the engine metrics are still experimental. If you want to use dockerd-exporter you have to enable +the experimental feature and set the metrics address to `0.0.0.0:9323`. + +If you are running Docker with systemd create or edit +/etc/systemd/system/docker.service.d/docker.conf file like so: + +``` +[Service] +ExecStart= +ExecStart=/usr/bin/dockerd \ + --storage-driver=overlay2 \ + --dns 8.8.4.4 --dns 8.8.8.8 \ + --experimental=true \ + --metrics-addr 0.0.0.0:9323 +``` + +Apply the config changes with `systemctl daemon-reload && systemctl restart docker` and +check if the docker_gwbridge ip address is 172.18.0.1: + +```bash +ip -o addr show docker_gwbridge +``` + +Replace 172.18.0.1 with your docker_gwbridge address in the compose file: + +```yaml + dockerd-exporter: + image: stefanprodan/caddy + environment: + - DOCKER_GWBRIDGE_IP=172.18.0.1 +``` + +Collecting Docker Swarm metrics with Prometheus is not a smooth process, and +because of `group_left` queries tend to become more complex. +In the future I hope Swarm DNS will contain the SRV record for hostname and Docker engine +metrics will expose container metrics replacing cAdvisor all together. + +## Configure Prometheus + +I've set the Prometheus retention period to 24h, you can change these values in the +compose file or using the env variable `PROMETHEUS_RETENTION`. + +```yaml + prometheus: + image: stefanprodan/swarmprom-prometheus + command: + - '-storage.tsdb.retention=24h' + deploy: + resources: + limits: + memory: 2048M + reservations: + memory: 1024M +``` + +When using host volumes you should ensure that Prometheus doesn't get scheduled on different nodes. You can +pin the Prometheus service on a specific host with placement constraints. + +```yaml + prometheus: + image: stefanprodan/swarmprom-prometheus + volumes: + - prometheus:/prometheus + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.labels.monitoring.role == prometheus +``` + +## Configure alerting + +The Prometheus swarmprom comes with the following alert rules: + +***Swarm Node CPU Usage*** + +Alerts when a node CPU usage goes over 80% for five minutes. + +``` +ALERT node_cpu_usage + IF 100 - (avg(irate(node_cpu{mode="idle"}[1m]) * on(instance) group_left(node_name) node_meta * 100) by (node_name)) > 80 + FOR 5m + LABELS { severity="warning" } + ANNOTATIONS { + summary = "CPU alert for Swarm node '{{ $labels.node_name }}'", + description = "Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize $value}}%.", + } +``` +***Swarm Node Memory Alert*** + +Alerts when a node memory usage goes over 80% for five minutes. + +``` +ALERT node_memory_usage + IF sum(((node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal) * on(instance) group_left(node_name) node_meta * 100) by (node_name) > 80 + FOR 5m + LABELS { severity="warning" } + ANNOTATIONS { + summary = "Memory alert for Swarm node '{{ $labels.node_name }}'", + description = "Swarm node {{ $labels.node_name }} memory usage is at {{ humanize $value}}%.", + } +``` +***Swarm Node Disk Alert*** + +Alerts when a node storage usage goes over 85% for five minutes. + +``` +ALERT node_disk_usage + IF ((node_filesystem_size{mountpoint="/rootfs"} - node_filesystem_free{mountpoint="/rootfs"}) * 100 / node_filesystem_size{mountpoint="/rootfs"}) * on(instance) group_left(node_name) node_meta > 85 + FOR 5m + LABELS { severity="warning" } + ANNOTATIONS { + summary = "Disk alert for Swarm node '{{ $labels.node_name }}'", + description = "Swarm node {{ $labels.node_name }} disk usage is at {{ humanize $value}}%.", + } +``` + +***Swarm Node Disk Fill Rate Alert*** + +Alerts when a node storage is going to remain out of free space in six hours. + +``` +ALERT node_disk_fill_rate_6h + IF predict_linear(node_filesystem_free{mountpoint="/rootfs"}[1h], 6*3600) * on(instance) group_left(node_name) node_meta < 0 + FOR 1h + LABELS { severity="critical" } + ANNOTATIONS { + summary = "Disk fill alert for Swarm node '{{ $labels.node_name }}'", + description = "Swarm node {{ $labels.node_name }} disk is going to fill up in 6h.", + } +``` + +You can add alerts to +[swarm_node](https://github.com/stefanprodan/swarmprom/blob/master/prometheus/rules/swarm_node.rules) +and [swarm_task](https://github.com/stefanprodan/swarmprom/blob/master/prometheus/rules/swarm_task.rules) +files and rerun stack deploy to update them. Because these files are mounted inside the Prometheus +container at run time as [Docker configs](https://docs.docker.com/engine/swarm/configs/) +you don't have to bundle them with the image. + +The Alertmanager swarmprom image is configured with the Slack receiver. +In order to receive alerts on Slack you have to provide the Slack API url, +username and channel via environment variables: + +```yaml + alertmanager: + image: stefanprodan/swarmprom-alertmanager + environment: + - SLACK_URL=${SLACK_URL} + - SLACK_CHANNEL=${SLACK_CHANNEL} + - SLACK_USER=${SLACK_USER} +``` + +You can install the `stress` package with apt and test out the CPU alert, you should receive something like this: + +![Alerts](https://raw.githubusercontent.com/stefanprodan/swarmprom/master/grafana/screens/alertmanager-slack-v2.png) + +Cloudflare has made a great dashboard for managing alerts. +Unsee can aggregate alerts from multiple Alertmanager instances, running either in HA mode or separate. +You can access unsee at `http://:9094` using the admin user/password set via compose up: + +![Unsee](https://raw.githubusercontent.com/stefanprodan/swarmprom/master/grafana/screens/unsee.png) + +## Monitoring applications and backend services + +You can extend swarmprom with special-purpose exporters for services like MongoDB, PostgreSQL, Kafka, +Redis and also instrument your own applications using the Prometheus client libraries. + +In order to scrape other services you need to attach those to the `mon_net` network so Prometheus +can reach them. Or you can attach the `mon_prometheus` service to the networks where your services are running. + +Once your services are reachable by Prometheus you can add the dns name and port of those services to the +Prometheus config using the `JOBS` environment variable: + +```yaml + prometheus: + image: stefanprodan/swarmprom-prometheus + environment: + - JOBS=mongo-exporter:9216 kafka-exporter:9216 redis-exporter:9216 +``` + +## Monitoring production systems + +The swarmprom project is meant as a starting point in developing your own monitoring solution. Before running this +in production you should consider building and publishing your own Prometheus, node exporter and alert manager +images. Docker Swarm doesn't play well with locally built images, the first step would be to setup a secure Docker +registry that your Swarm has access to and push the images there. Your CI system should assign version tags to each +image. Don't rely on the latest tag for continuous deployments, Prometheus will soon reach v2 and the data store +will not be backwards compatible with v1.x. + +Another thing you should consider is having redundancy for Prometheus and alert manager. +You could run them as a service with two replicas pinned on different nodes, or even better, +use a service like Weave Cloud Cortex to ship your metrics outside of your current setup. +You can use Weave Cloud not only as a backup of your +metrics database but you can also define alerts and use it as a data source for your Grafana dashboards. +Having the alerting and monitoring system hosted on a different platform other than your production +is good practice that will allow you to react quickly and efficiently when a major disaster strikes. + +Swarmprom comes with built-in [Weave Cloud](https://www.weave.works/product/cloud/) integration, +what you need to do is run the weave-compose stack with your Weave service token: + +```bash +TOKEN= \ +ADMIN_USER=admin \ +ADMIN_PASSWORD=admin \ +docker stack deploy -c weave-compose.yml mon +``` + +This will deploy Weave Scope and Prometheus with Weave Cortex as remote write. +The local retention is set to 24h so even if your internet connection drops you'll not lose data +as Prometheus will retry pushing data to Weave Cloud when the connection is up again. + +You can define alerts and notifications routes in Weave Cloud in the same way you would do with alert manager. + +To use Grafana with Weave Cloud you have to reconfigure the Prometheus data source like this: + +* Name: Prometheus +* Type: Prometheus +* Url: https://cloud.weave.works/api/prom +* Access: proxy +* Basic auth: use your service token as password, the user value is ignored + +Weave Scope automatically generates a map of your application, enabling you to intuitively understand, +monitor, and control your microservices based application. +You can view metrics, tags and metadata of the running processes, containers and hosts. +Scope offers remote access to the Swarm’s nods and containers, making it easy to diagnose issues in real-time. + +![Scope](https://raw.githubusercontent.com/stefanprodan/swarmprom/master/grafana/screens/weave-scope.png) + +![Scope Hosts](https://raw.githubusercontent.com/stefanprodan/swarmprom/master/grafana/screens/weave-scope-hosts-v2.png) diff --git a/stacks/lvl1/swarmprom/alertmanager/Dockerfile b/stacks/lvl1/swarmprom/alertmanager/Dockerfile new file mode 100644 index 0000000..972598d --- /dev/null +++ b/stacks/lvl1/swarmprom/alertmanager/Dockerfile @@ -0,0 +1,7 @@ +FROM prom/alertmanager:v0.15.3 + +COPY conf /etc/alertmanager/ + +ENTRYPOINT [ "/etc/alertmanager/docker-entrypoint.sh" ] +CMD [ "--config.file=/etc/alertmanager/alertmanager.yml", \ + "--storage.path=/alertmanager" ] diff --git a/stacks/lvl1/swarmprom/alertmanager/conf/alertmanager.yml b/stacks/lvl1/swarmprom/alertmanager/conf/alertmanager.yml new file mode 100644 index 0000000..4859ab9 --- /dev/null +++ b/stacks/lvl1/swarmprom/alertmanager/conf/alertmanager.yml @@ -0,0 +1,11 @@ +route: + receiver: 'slack' + +receivers: + - name: 'slack' + slack_configs: + - send_resolved: true + text: "{{ .CommonAnnotations.description }}" + #username: # + #channel: # + #api_url: # diff --git a/stacks/lvl1/swarmprom/alertmanager/conf/docker-entrypoint.sh b/stacks/lvl1/swarmprom/alertmanager/conf/docker-entrypoint.sh new file mode 100755 index 0000000..41e9d96 --- /dev/null +++ b/stacks/lvl1/swarmprom/alertmanager/conf/docker-entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/sh -e + +cat /etc/alertmanager/alertmanager.yml |\ + sed "s@#api_url: #@api_url: '$SLACK_URL'@g" |\ + sed "s@#channel: #@channel: '#$SLACK_CHANNEL'@g" |\ + sed "s@#username: #@username: '$SLACK_USER'@g" > /tmp/alertmanager.yml + +mv /tmp/alertmanager.yml /etc/alertmanager/alertmanager.yml + +set -- /bin/alertmanager "$@" + +exec "$@" diff --git a/stacks/lvl1/swarmprom/caddy/Caddyfile b/stacks/lvl1/swarmprom/caddy/Caddyfile new file mode 100644 index 0000000..e1b46ec --- /dev/null +++ b/stacks/lvl1/swarmprom/caddy/Caddyfile @@ -0,0 +1,40 @@ +:9090 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / prometheus:9090 { + transparent + } + + errors stderr + tls off +} + +:9093 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / alertmanager:9093 { + transparent + } + + errors stderr + tls off +} + +:9094 { + basicauth / {$ADMIN_USER} {$ADMIN_PASSWORD} + proxy / unsee:8080 { + transparent + } + + errors stderr + tls off +} + +:3000 { + proxy / grafana:3000 { + transparent + websocket + } + + errors stderr + tls off +} + diff --git a/stacks/lvl1/swarmprom/docker-compose.traefik.yml b/stacks/lvl1/swarmprom/docker-compose.traefik.yml new file mode 100644 index 0000000..dab1afe --- /dev/null +++ b/stacks/lvl1/swarmprom/docker-compose.traefik.yml @@ -0,0 +1,236 @@ +version: "3.3" + +networks: + net: + driver: overlay + attachable: true + traefik-public: + external: true + +volumes: + prometheus: {} + grafana: {} + alertmanager: {} + +configs: + dockerd_config: + file: ./dockerd-exporter/Caddyfile + node_rules: + file: ./prometheus/rules/swarm_node.rules.yml + task_rules: + file: ./prometheus/rules/swarm_task.rules.yml + +services: + dockerd-exporter: + image: stefanprodan/caddy + networks: + - net + environment: + - DOCKER_GWBRIDGE_IP=172.18.0.1 + configs: + - source: dockerd_config + target: /etc/caddy/Caddyfile + deploy: + mode: global + resources: + limits: + memory: 128M + reservations: + memory: 64M + + cadvisor: + image: google/cadvisor + networks: + - net + command: -logtostderr -docker_only + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - /:/rootfs:ro + - /var/run:/var/run + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + deploy: + mode: global + resources: + limits: + memory: 128M + reservations: + memory: 64M + + grafana: + image: stefanprodan/swarmprom-grafana:5.3.4 + networks: + - default + - net + - traefik-public + environment: + - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin} + - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} + - GF_USERS_ALLOW_SIGN_UP=false + #- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL:-localhost} + #- GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-false} + #- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:-grafana@test.com} + #- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana} + #- GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25} + #- GF_SMTP_USER=${GF_SMTP_USER} + #- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD} + volumes: + - grafana:/var/lib/grafana + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 128M + reservations: + memory: 64M + labels: + - traefik.enable=true + - traefik.docker.network=traefik-public + - traefik.constraint-label=traefik-public + - traefik.http.routers.swarmprom-grafana-http.rule=Host(`grafana.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-grafana-http.entrypoints=http + - traefik.http.routers.swarmprom-grafana-http.middlewares=https-redirect + - traefik.http.routers.swarmprom-grafana-https.rule=Host(`grafana.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-grafana-https.entrypoints=https + - traefik.http.routers.swarmprom-grafana-https.tls=true + - traefik.http.routers.swarmprom-grafana-https.tls.certresolver=le + - traefik.http.services.swarmprom-grafana.loadbalancer.server.port=3000 + + alertmanager: + image: stefanprodan/swarmprom-alertmanager:v0.14.0 + networks: + - default + - net + - traefik-public + environment: + - SLACK_URL=${SLACK_URL:-https://hooks.slack.com/services/TOKEN} + - SLACK_CHANNEL=${SLACK_CHANNEL:-general} + - SLACK_USER=${SLACK_USER:-alertmanager} + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + volumes: + - alertmanager:/alertmanager + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 128M + reservations: + memory: 64M + labels: + - traefik.enable=true + - traefik.docker.network=traefik-public + - traefik.constraint-label=traefik-public + - traefik.http.routers.swarmprom-alertmanager-http.rule=Host(`alertmanager.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-alertmanager-http.entrypoints=http + - traefik.http.routers.swarmprom-alertmanager-http.middlewares=https-redirect + - traefik.http.routers.swarmprom-alertmanager-https.rule=Host(`alertmanager.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-alertmanager-https.entrypoints=https + - traefik.http.routers.swarmprom-alertmanager-https.tls=true + - traefik.http.routers.swarmprom-alertmanager-https.tls.certresolver=le + - traefik.http.services.swarmprom-alertmanager.loadbalancer.server.port=9093 + - traefik.http.middlewares.swarmprom-alertmanager-auth.basicauth.users=${ADMIN_USER?Variable ADMIN_USER not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set} + - traefik.http.routers.swarmprom-alertmanager-https.middlewares=swarmprom-alertmanager-auth + + unsee: + image: cloudflare/unsee:v0.8.0 + networks: + - default + - net + - traefik-public + environment: + - "ALERTMANAGER_URIS=default:http://alertmanager:9093" + deploy: + mode: replicated + replicas: 1 + labels: + - traefik.enable=true + - traefik.docker.network=traefik-public + - traefik.constraint-label=traefik-public + - traefik.http.routers.swarmprom-unsee-http.rule=Host(`unsee.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-unsee-http.entrypoints=http + - traefik.http.routers.swarmprom-unsee-http.middlewares=https-redirect + - traefik.http.routers.swarmprom-unsee-https.rule=Host(`unsee.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-unsee-https.entrypoints=https + - traefik.http.routers.swarmprom-unsee-https.tls=true + - traefik.http.routers.swarmprom-unsee-https.tls.certresolver=le + - traefik.http.services.swarmprom-unsee.loadbalancer.server.port=8080 + - traefik.http.middlewares.swarmprom-unsee-auth.basicauth.users=${ADMIN_USER?Variable ADMIN_USER not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set} + - traefik.http.routers.swarmprom-unsee-https.middlewares=swarmprom-unsee-auth + + node-exporter: + image: stefanprodan/swarmprom-node-exporter:v0.16.0 + networks: + - net + environment: + - NODE_ID={{.Node.ID}} + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /etc/hostname:/etc/nodename + command: + - '--path.sysfs=/host/sys' + - '--path.procfs=/host/proc' + - '--collector.textfile.directory=/etc/node-exporter/' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' + - '--no-collector.ipvs' + deploy: + mode: global + resources: + limits: + memory: 128M + reservations: + memory: 64M + + prometheus: + image: stefanprodan/swarmprom-prometheus:v2.5.0 + networks: + - default + - net + - traefik-public + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}' + volumes: + - prometheus:/prometheus + configs: + - source: node_rules + target: /etc/prometheus/swarm_node.rules.yml + - source: task_rules + target: /etc/prometheus/swarm_task.rules.yml + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 2048M + reservations: + memory: 128M + labels: + - traefik.enable=true + - traefik.docker.network=traefik-public + - traefik.constraint-label=traefik-public + - traefik.http.routers.swarmprom-prometheus-http.rule=Host(`prometheus.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-prometheus-http.entrypoints=http + - traefik.http.routers.swarmprom-prometheus-http.middlewares=https-redirect + - traefik.http.routers.swarmprom-prometheus-https.rule=Host(`prometheus.${DOMAIN?Variable DOMAIN not set}`) + - traefik.http.routers.swarmprom-prometheus-https.entrypoints=https + - traefik.http.routers.swarmprom-prometheus-https.tls=true + - traefik.http.routers.swarmprom-prometheus-https.tls.certresolver=le + - traefik.http.services.swarmprom-prometheus.loadbalancer.server.port=9090 + - traefik.http.middlewares.swarmprom-prometheus-auth.basicauth.users=${ADMIN_USER?Variable ADMIN_USER not set}:${HASHED_PASSWORD?Variable HASHED_PASSWORD not set} + - traefik.http.routers.swarmprom-prometheus-https.middlewares=swarmprom-prometheus-auth diff --git a/stacks/lvl1/swarmprom/docker-compose.yml b/stacks/lvl1/swarmprom/docker-compose.yml new file mode 100644 index 0000000..e45aec4 --- /dev/null +++ b/stacks/lvl1/swarmprom/docker-compose.yml @@ -0,0 +1,201 @@ +version: "3.3" + +networks: + mother: + external: true + + +configs: + caddy_config: + external: true + dockerd_config: + external: true + node_rules: + external: true + task_rules: + external: true + +services: + dockerd-exporter: + image: registry.vnfco.ir/library/stefanprodan/caddy + networks: + - mother + environment: + - DOCKER_GWBRIDGE_IP=172.18.0.1 + configs: + - source: dockerd_config + target: /etc/caddy/Caddyfile + deploy: + mode: global + resources: + limits: + memory: 64M + reservations: + memory: 32M + + cadvisor: + image: registry.vnfco.ir/library/google/cadvisor + networks: + - mother + command: -logtostderr -docker_only + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - /:/rootfs:ro + - /var/run:/var/run + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + deploy: + mode: global + resources: + limits: + memory: 64M + reservations: + memory: 32M + + grafana: + image: registry.vnfco.ir/library/stefanprodan/swarmprom-grafana:5.3.4 + networks: + - mother + environment: + - GF_SECURITY_ADMIN_USER=nerdguy + - GF_SECURITY_ADMIN_PASSWORD=RbBmIu1LCgIqKcMmopc1Ie21+1OKsX1ktNCAeiViNIb6HAKkzjJ+GhZlLysItwgm + - GF_USERS_ALLOW_SIGN_UP=false + #- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL:-localhost} + #- GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-false} + #- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:-grafana@test.com} + #- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana} + #- GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25} + #- GF_SMTP_USER=${GF_SMTP_USER} + #- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD} + volumes: + - /volume/grafana:/var/lib/grafana + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 128M + reservations: + memory: 32M + + alertmanager: + image: registry.vnfco.ir/library/stefanprodan/swarmprom-alertmanager:v0.14.0 + networks: + - mother + environment: + - SLACK_URL=https://hooks.slack.com/services/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 + - SLACK_CHANNEL=alert + - SLACK_USER=alertmanager + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + volumes: + - /volume/alertmanager:/alertmanager + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 64M + reservations: + memory: 32M + + unsee: + image: registry.vnfco.ir/library/cloudflare/unsee:v0.8.0 + networks: + - mother + environment: + - "ALERTMANAGER_URIS=default:http://alertmanager:9093" + deploy: + mode: replicated + replicas: 1 + + node-exporter: + image: registry.vnfco.ir/library/stefanprodan/swarmprom-node-exporter:v0.16.0 + networks: + - mother + environment: + - NODE_ID={{.Node.ID}} + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /etc/hostname:/etc/nodename + command: + - '--path.sysfs=/host/sys' + - '--path.procfs=/host/proc' + - '--collector.textfile.directory=/etc/node-exporter/' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' + - '--no-collector.ipvs' + deploy: + mode: global + resources: + limits: + memory: 64M + reservations: + memory: 32M + + prometheus: + image: registry.vnfco.ir/library/stefanprodan/swarmprom-prometheus:v2.5.0 + networks: + - mother + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}' + volumes: + - /volume/prometheus:/prometheus + configs: + - source: node_rules + target: /etc/prometheus/swarm_node.rules.yml + - source: task_rules + target: /etc/prometheus/swarm_task.rules.yml + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 512M + reservations: + memory: 128M + + caddy: + image: registry.vnfco.ir/library/stefanprodan/caddy + ports: + - "3000:3000" + - "9090:9090" + - "9093:9093" + - "9094:9094" + networks: + - mother + environment: + - ADMIN_USER=nerdguy + - ADMIN_PASSWORD=RbBmIu1LCgIqKcMmopc1Ie21+1OKsX1ktNCAeiViNIb6HAKkzjJ+GhZlLysItwgm + configs: + - source: caddy_config + target: /etc/caddy/Caddyfile + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + resources: + limits: + memory: 128M + reservations: + memory: 64M + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000"] + interval: 5s + timeout: 1s + retries: 5 diff --git a/stacks/lvl1/swarmprom/dockerd-exporter/Caddyfile b/stacks/lvl1/swarmprom/dockerd-exporter/Caddyfile new file mode 100644 index 0000000..5036d47 --- /dev/null +++ b/stacks/lvl1/swarmprom/dockerd-exporter/Caddyfile @@ -0,0 +1,8 @@ +:9323 { + proxy / {$DOCKER_GWBRIDGE_IP}:9323 { + transparent + } + + errors stderr + tls off +} diff --git a/stacks/lvl1/swarmprom/grafana/.dockerignore b/stacks/lvl1/swarmprom/grafana/.dockerignore new file mode 100644 index 0000000..2b0da58 --- /dev/null +++ b/stacks/lvl1/swarmprom/grafana/.dockerignore @@ -0,0 +1 @@ +screens/ diff --git a/stacks/lvl1/swarmprom/grafana/Dockerfile b/stacks/lvl1/swarmprom/grafana/Dockerfile new file mode 100644 index 0000000..1e6d7df --- /dev/null +++ b/stacks/lvl1/swarmprom/grafana/Dockerfile @@ -0,0 +1,10 @@ +FROM grafana/grafana:5.3.4 +# https://hub.docker.com/r/grafana/grafana/tags/ + +COPY datasources /etc/grafana/provisioning/datasources/ +COPY swarmprom_dashboards.yml /etc/grafana/provisioning/dashboards/ +COPY dashboards /etc/grafana/dashboards/ + +ENV GF_SECURITY_ADMIN_PASSWORD=admin \ + GF_SECURITY_ADMIN_USER=admin \ + GF_PATHS_PROVISIONING=/etc/grafana/provisioning/ diff --git a/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-nodes-dash.json b/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-nodes-dash.json new file mode 100644 index 0000000..ef5cf84 --- /dev/null +++ b/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-nodes-dash.json @@ -0,0 +1,2064 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Docker Swarm nodes metrics", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1547535746076, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 1, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "hideTimeOverride": true, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "topk(1, sum((node_time_seconds - node_boot_time_seconds) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "hideTimeOverride": true, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(node_cpu_seconds_total{mode=\"idle\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "CPUs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "hideTimeOverride": true, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "10,25,100", + "timeFrom": "1m", + "timeShift": null, + "title": "CPU Idle", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load5 * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "load5 {{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System Load by Node", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage by Node", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 11 + }, + "hideTimeOverride": true, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_memory_MemTotal_bytes * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 3, + "y": 11 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 7, + "y": 11 + }, + "hideTimeOverride": true, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total swap memory used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 10, + "y": 11 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_SwapTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "5,10,100", + "title": "Used swap memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 14, + "y": 11 + }, + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "5,10,100", + "title": "Swap used / total RAM memory ratio", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 17, + "y": 11 + }, + "hideTimeOverride": true, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_filesystem_size_bytes{mountpoint=\"/rootfs\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Disk Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 11 + }, + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Disk Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_Slab_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Used {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(node_memory_Cached * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cached {{node_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 21, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum((node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Used {{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Swap memory usage by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_disk_read_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Read {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(irate(node_disk_written_bytes_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Written {{node_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(node_disk_reads_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reads {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(irate(node_disk_writes_completed_total[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes {{node_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(node_cpu_seconds_total{mode=\"iowait\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) by (node_name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU IO Wait by Node", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 3, + "gridPos": { + "h": 7, + "w": 18, + "x": 0, + "y": 43 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Running Containers by Service", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 43 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total Containers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 50 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "IN {{node_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval]) * on(container_label_com_docker_swarm_node_id) group_left(node_name) node_meta) by (node_name)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "OUT {{node_name}}", + "metric": "", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers Network Traffic by Node", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 57 + }, + "hideTimeOverride": true, + "id": 20, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_meta) by (node_id, node_name, instance)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "timeFrom": "1s", + "title": "Cluster members", + "transform": "table", + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "swarmprom" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Swarm Node", + "multi": false, + "name": "node_id", + "options": [], + "query": "node_meta", + "refresh": 1, + "regex": "/node_id=\"([^\"]+)\"/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "label_values({node_id=\"$tag\"},node_name)", + "tags": [ + "ofdocker", + "ofmon" + ], + "tagsQuery": "label_values(node_meta, node_name)", + "type": "query", + "useTags": true + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "30s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Docker Swarm Nodes", + "uid": "BPlb-Sgik", + "version": 3 +} diff --git a/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-prometheus-dash.json b/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-prometheus-dash.json new file mode 100644 index 0000000..2dc7ca9 --- /dev/null +++ b/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-prometheus-dash.json @@ -0,0 +1,1248 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:698", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "links": [ + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Grafana Docs", + "tooltip": "", + "type": "link", + "url": "http://docs.grafana.org/" + }, + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "panels": [ + { + "aliasColors": { + "prometheus": "#C15C17", + "{instance=\"localhost:9090\",job=\"prometheus\"}": "#CCA300" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus\"}[5m]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "samples", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Samples Appended", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(5, max(scrape_duration_seconds) by (job))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Scrape Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(process_resident_memory_bytes{job=\"prometheus\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "p8s process resident memory", + "refId": "D", + "step": 20 + }, + { + "expr": "process_virtual_memory_bytes{job=\"prometheus\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "virtual memory", + "refId": "C", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Profile", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 37, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "prometheus_tsdb_wal_corruptions_total{job=\"prometheus\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "0.1,1", + "title": "WAL Corruptions", + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ + { + "op": "=", + "text": "None", + "value": "0" + } + ], + "valueName": "max" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 5 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_active_appenders{job=\"prometheus\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "active_appenders", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "expr": "sum(process_open_fds{job=\"prometheus\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "open_fds", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Active Appenders", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "prometheus": "#F9BA8F", + "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 5 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_blocks_loaded{job=\"prometheus\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "blocks", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Blocks Loaded", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "description": "", + "fill": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 5 + }, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_chunks{job=\"prometheus\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "chunks", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Head Chunks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 5 + }, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "duration-p99", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_gc_duration_seconds{job=\"prometheus\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "duration-p99", + "refId": "A", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_head_gc_duration_seconds_count{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "collections", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Head Block GC Activity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "description": "", + "fill": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "duration-p99", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_bucket{job=\"prometheus\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "duration-{{p99}}", + "refId": "A", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_compactions_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "compactions", + "refId": "B", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_compactions_failed_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "failed", + "refId": "C", + "step": 20 + }, + { + "expr": "irate(prometheus_tsdb_compactions_triggered_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "triggered", + "refId": "D", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compaction Activity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_tsdb_reloads_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reloads", + "refId": "A", + "step": 20 + }, + { + "expr": "rate(prometheus_tsdb_reloads_failures_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "failures", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reload Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 0, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_engine_query_duration_seconds{job=\"prometheus\", quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{slice}}_p99", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Query Durations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 35, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_rule_group_duration_seconds{job=\"prometheus\"}) by (quantile)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Rule Group Eval Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_rule_group_iterations_missed_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "missed", + "refId": "B", + "step": 10 + }, + { + "expr": "rate(prometheus_rule_group_iterations_total{job=\"prometheus\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "iterations", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Rule Group Eval Activity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "1m", + "revision": "1.0", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Prometheus 2.0 Stats", + "uid": "mGFfYSRiz", + "version": 1 +} diff --git a/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-services-dash.json b/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-services-dash.json new file mode 100644 index 0000000..cacbf00 --- /dev/null +++ b/stacks/lvl1/swarmprom/grafana/dashboards/swarmprom-services-dash.json @@ -0,0 +1,1885 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:429", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Docker Swarm stacks and services metrics", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1520585594614, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "hideTimeOverride": true, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_node_id =~\"$node_id\"}) by (container_label_com_docker_swarm_node_id))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "hideTimeOverride": true, + "id": 21, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(container_tasks_state{container_label_com_docker_stack_namespace=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_stack_namespace))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Stacks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "hideTimeOverride": true, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(container_tasks_state{container_label_com_docker_swarm_service_name=~\".+\", container_label_com_docker_swarm_node_id=~\"$node_id\"}) by (container_label_com_docker_swarm_service_name))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Services", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "hideTimeOverride": true, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) ", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Containers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 5, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_last_seen{container_label_com_docker_swarm_node_id=~\"$node_id\"}[5m])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{ container_label_com_docker_swarm_service_name }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Service Tasks", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 32, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(engine_daemon_health_checks_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "checks", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(increase(engine_daemon_health_checks_failed_total[$interval]) * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "failed", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Health Checks", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 20, + "x": 0, + "y": 11 + }, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[1m])) by (container_label_com_docker_swarm_service_name) * 100 ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_label_com_docker_swarm_service_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage by Service", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 11 + }, + "hideTimeOverride": true, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\"}[$interval]) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) * 100 / count(node_cpu_seconds_total{mode=\"user\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"}) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "10,25,100", + "timeFrom": "1m", + "timeShift": null, + "title": "CPU Idle", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 33, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(irate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval])) by (name)) * 100 ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage by Container (top 10)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 20, + "x": 0, + "y": 25 + }, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Used {{container_label_com_docker_swarm_service_name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(container_memory_cache{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}) by (container_label_com_docker_swarm_service_name) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cached {{container_label_com_docker_swarm_service_name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage by Service", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 25 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, avg_over_time(container_memory_usage_bytes{container_label_com_docker_swarm_node_id=~\"$node_id\", id=~\"/docker/.*\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage by Container (top 10)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_label_com_docker_swarm_service_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network received by Service", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{container_label_com_docker_swarm_node_id=~\"$node_id\"}[$interval])) by (container_label_com_docker_swarm_service_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_label_com_docker_swarm_service_name}}", + "metric": "", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network transmitted by Service", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 53 + }, + "id": 31, + "legend": { + "avg": true, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Received", + "refId": "A", + "step": 4 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmited", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 10, + "x": 10, + "y": 53 + }, + "id": 26, + "legend": { + "alignAsTable": false, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_fs_reads_total[$interval]) )", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reads", + "refId": "A", + "step": 4 + }, + { + "expr": "sum(irate(container_fs_writes_total[$interval])) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes ", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 53 + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "10,25,100", + "title": "Available Disk Space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 60 + }, + "id": 29, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(engine_daemon_container_actions_seconds_count * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) by (action)", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{action }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Docker Daemon Container Actions", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 60 + }, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(engine_daemon_network_actions_seconds_count * on(instance) group_left(node_id) swarm_node_info{node_id=~\"$node_id\"}) by (action)", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "{{action }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Docker Daemon Network Actions", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "columns": [ + { + "text": "Avg", + "value": "avg" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 67 + }, + "hideTimeOverride": true, + "id": 28, + "links": [], + "pageSize": null, + "repeat": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(engine_daemon_engine_info * on(instance) group_left(node_id) swarm_node_info) by (kernel, os, graphdriver, version, node_id)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + } + ], + "timeFrom": "1s", + "title": "Docker Engine Info", + "transform": "timeseries_to_rows", + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "swarmprom" + ], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Swarm Node", + "multi": false, + "name": "node_id", + "options": [], + "query": "node_meta", + "refresh": 2, + "regex": "/node_id=\"([^\"]+)\"/", + "sort": 0, + "tagValuesQuery": "label_values({node_id=\"$tag\"},node_name)", + "tags": [ + "ofdocker", + "ofmon" + ], + "tagsQuery": "label_values(node_meta, node_name)", + "type": "query", + "useTags": true + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "30s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Docker Swarm Services", + "uid": "zr_baSRmk", + "version": 1 +} diff --git a/stacks/lvl1/swarmprom/grafana/datasources/prometheus.yaml b/stacks/lvl1/swarmprom/grafana/datasources/prometheus.yaml new file mode 100644 index 0000000..31a8607 --- /dev/null +++ b/stacks/lvl1/swarmprom/grafana/datasources/prometheus.yaml @@ -0,0 +1,13 @@ +apiVersion: 1 + +deleteDatasources: + - name: Prometheus + +datasources: +- name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + version: 1 + editable: true diff --git a/stacks/lvl1/swarmprom/grafana/screens/alertmanager-slack-v2.png b/stacks/lvl1/swarmprom/grafana/screens/alertmanager-slack-v2.png new file mode 100644 index 0000000..79ab18d Binary files /dev/null and b/stacks/lvl1/swarmprom/grafana/screens/alertmanager-slack-v2.png differ diff --git a/stacks/lvl1/swarmprom/grafana/screens/swarmprom-nodes-dash-v3.png b/stacks/lvl1/swarmprom/grafana/screens/swarmprom-nodes-dash-v3.png new file mode 100644 index 0000000..eb8a24a Binary files /dev/null and b/stacks/lvl1/swarmprom/grafana/screens/swarmprom-nodes-dash-v3.png differ diff --git a/stacks/lvl1/swarmprom/grafana/screens/swarmprom-prometheus-dash-v3.png b/stacks/lvl1/swarmprom/grafana/screens/swarmprom-prometheus-dash-v3.png new file mode 100755 index 0000000..a9e1f73 Binary files /dev/null and b/stacks/lvl1/swarmprom/grafana/screens/swarmprom-prometheus-dash-v3.png differ diff --git a/stacks/lvl1/swarmprom/grafana/screens/swarmprom-services-dash-v3.png b/stacks/lvl1/swarmprom/grafana/screens/swarmprom-services-dash-v3.png new file mode 100644 index 0000000..9f921ae Binary files /dev/null and b/stacks/lvl1/swarmprom/grafana/screens/swarmprom-services-dash-v3.png differ diff --git a/stacks/lvl1/swarmprom/grafana/screens/unsee.png b/stacks/lvl1/swarmprom/grafana/screens/unsee.png new file mode 100755 index 0000000..06563f3 Binary files /dev/null and b/stacks/lvl1/swarmprom/grafana/screens/unsee.png differ diff --git a/stacks/lvl1/swarmprom/grafana/screens/weave-scope-hosts-v2.png b/stacks/lvl1/swarmprom/grafana/screens/weave-scope-hosts-v2.png new file mode 100755 index 0000000..b3b19d1 Binary files /dev/null and b/stacks/lvl1/swarmprom/grafana/screens/weave-scope-hosts-v2.png differ diff --git a/stacks/lvl1/swarmprom/grafana/screens/weave-scope.png b/stacks/lvl1/swarmprom/grafana/screens/weave-scope.png new file mode 100644 index 0000000..51c9b37 Binary files /dev/null and b/stacks/lvl1/swarmprom/grafana/screens/weave-scope.png differ diff --git a/stacks/lvl1/swarmprom/grafana/swarmprom_dashboards.yml b/stacks/lvl1/swarmprom/grafana/swarmprom_dashboards.yml new file mode 100644 index 0000000..ee18e35 --- /dev/null +++ b/stacks/lvl1/swarmprom/grafana/swarmprom_dashboards.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /etc/grafana/dashboards diff --git a/stacks/lvl1/swarmprom/node-exporter/Dockerfile b/stacks/lvl1/swarmprom/node-exporter/Dockerfile new file mode 100644 index 0000000..dd3b4b6 --- /dev/null +++ b/stacks/lvl1/swarmprom/node-exporter/Dockerfile @@ -0,0 +1,10 @@ +FROM prom/node-exporter:v0.16.0 + +ENV NODE_ID=none + +USER root + +COPY conf /etc/node-exporter/ + +ENTRYPOINT [ "/etc/node-exporter/docker-entrypoint.sh" ] +CMD [ "/bin/node_exporter" ] diff --git a/stacks/lvl1/swarmprom/node-exporter/conf/docker-entrypoint.sh b/stacks/lvl1/swarmprom/node-exporter/conf/docker-entrypoint.sh new file mode 100755 index 0000000..db422f4 --- /dev/null +++ b/stacks/lvl1/swarmprom/node-exporter/conf/docker-entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh -e + +NODE_NAME=$(cat /etc/nodename) +echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom + +set -- /bin/node_exporter "$@" + +exec "$@" diff --git a/stacks/lvl1/swarmprom/prometheus/Dockerfile b/stacks/lvl1/swarmprom/prometheus/Dockerfile new file mode 100644 index 0000000..cb31a08 --- /dev/null +++ b/stacks/lvl1/swarmprom/prometheus/Dockerfile @@ -0,0 +1,10 @@ +FROM prom/prometheus:v2.5.0 +# https://hub.docker.com/r/prom/prometheus/tags/ + +ENV WEAVE_TOKEN=none + +COPY conf /etc/prometheus/ + +ENTRYPOINT [ "/etc/prometheus/docker-entrypoint.sh" ] +CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ + "--storage.tsdb.path=/prometheus" ] diff --git a/stacks/lvl1/swarmprom/prometheus/conf/docker-entrypoint.sh b/stacks/lvl1/swarmprom/prometheus/conf/docker-entrypoint.sh new file mode 100755 index 0000000..3acd9f3 --- /dev/null +++ b/stacks/lvl1/swarmprom/prometheus/conf/docker-entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/sh -e + +cat /etc/prometheus/prometheus.yml > /tmp/prometheus.yml +cat /etc/prometheus/weave-cortex.yml | \ + sed "s@#password: #@password: '$WEAVE_TOKEN'@g" > /tmp/weave-cortex.yml + +#JOBS=mongo-exporter:9111 redis-exporter:9112 + +if [ ${JOBS+x} ]; then + +for job in $JOBS +do +echo "adding job $job" + +SERVICE=$(echo "$job" | cut -d":" -f1) +PORT=$(echo "$job" | cut -d":" -f2) + +cat >>/tmp/prometheus.yml <>/tmp/weave-cortex.yml <# + +global: + scrape_interval: 15s + evaluation_interval: 15s + + external_labels: + monitor: 'promswarm' + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'dockerd-exporter' + dns_sd_configs: + - names: + - 'tasks.dockerd-exporter' + type: 'A' + port: 9323 + + - job_name: 'cadvisor' + dns_sd_configs: + - names: + - 'tasks.cadvisor' + type: 'A' + port: 8080 + + - job_name: 'node-exporter' + dns_sd_configs: + - names: + - 'tasks.node-exporter' + type: 'A' + port: 9100 diff --git a/stacks/lvl1/swarmprom/prometheus/rules/swarm_node.rules.yml b/stacks/lvl1/swarmprom/prometheus/rules/swarm_node.rules.yml new file mode 100644 index 0000000..5b0eaaf --- /dev/null +++ b/stacks/lvl1/swarmprom/prometheus/rules/swarm_node.rules.yml @@ -0,0 +1,44 @@ +groups: +- name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_node.rules.yml + rules: + - alert: node_cpu_usage + expr: 100 - (avg(irate(node_cpu_seconds_total{mode="idle"}[1m]) * ON(instance) GROUP_LEFT(node_name) + node_meta * 100) BY (node_name)) > 50 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize + $value}}%. + summary: CPU alert for Swarm node '{{ $labels.node_name }}' + - alert: node_memory_usage + expr: sum(((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) + * ON(instance) GROUP_LEFT(node_name) node_meta * 100) BY (node_name) > 80 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} memory usage is at {{ humanize + $value}}%. + summary: Memory alert for Swarm node '{{ $labels.node_name }}' + - alert: node_disk_usage + expr: ((node_filesystem_size_bytes{mountpoint="/rootfs"} - node_filesystem_free_bytes{mountpoint="/rootfs"}) + * 100 / node_filesystem_size_bytes{mountpoint="/rootfs"}) * ON(instance) GROUP_LEFT(node_name) + node_meta > 85 + for: 1m + labels: + severity: warning + annotations: + description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize + $value}}%. + summary: Disk alert for Swarm node '{{ $labels.node_name }}' + - alert: node_disk_fill_rate_6h + expr: predict_linear(node_filesystem_free_bytes{mountpoint="/rootfs"}[1h], 6 * 3600) * ON(instance) + GROUP_LEFT(node_name) node_meta < 0 + for: 1h + labels: + severity: critical + annotations: + description: Swarm node {{ $labels.node_name }} disk is going to fill up in + 6h. + summary: Disk fill alert for Swarm node '{{ $labels.node_name }}' diff --git a/stacks/lvl1/swarmprom/prometheus/rules/swarm_task.rules.yml b/stacks/lvl1/swarmprom/prometheus/rules/swarm_task.rules.yml new file mode 100644 index 0000000..db9aa7e --- /dev/null +++ b/stacks/lvl1/swarmprom/prometheus/rules/swarm_task.rules.yml @@ -0,0 +1,24 @@ +groups: +- name: /1/store/projects/vagrant/docker-swarm-vagrant/apps/swarmprom/prometheus/rules/swarm_task.rules.yml + rules: + - alert: task_high_cpu_usage_50 + expr: sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_swarm_task_name=~".+"}[1m])) + BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) + * 100 > 50 + for: 1m + annotations: + description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ + $labels.container_label_com_docker_swarm_node_id }}'' CPU usage is at {{ humanize + $value}}%.' + summary: CPU alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name + }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' + - alert: task_high_memory_usage_1g + expr: sum(container_memory_rss{container_label_com_docker_swarm_task_name=~".+"}) + BY (container_label_com_docker_swarm_task_name, container_label_com_docker_swarm_node_id) > 1e+09 + for: 1m + annotations: + description: '{{ $labels.container_label_com_docker_swarm_task_name }} on ''{{ + $labels.container_label_com_docker_swarm_node_id }}'' memory usage is {{ humanize + $value}}.' + summary: Memory alert for Swarm task '{{ $labels.container_label_com_docker_swarm_task_name + }}' on '{{ $labels.container_label_com_docker_swarm_node_id }}' diff --git a/stacks/lvl1/swarmprom/test-compose.yml b/stacks/lvl1/swarmprom/test-compose.yml new file mode 100644 index 0000000..764467e --- /dev/null +++ b/stacks/lvl1/swarmprom/test-compose.yml @@ -0,0 +1,37 @@ +version: "3.3" + +networks: + net: + driver: overlay + attachable: true + mon_net: + external: true + +services: + + mongo: + image: healthcheck/mongo:latest + networks: + - net + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role != manager + + mongo-exporter: + image: forekshub/percona-mongodb-exporter:latest + networks: + - net + - mon_net + ports: + - "9216:9216" + environment: + - MONGODB_URL=mongodb://mongo:27017 + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager diff --git a/stacks/lvl1/swarmprom/weave-compose.yml b/stacks/lvl1/swarmprom/weave-compose.yml new file mode 100644 index 0000000..c91509c --- /dev/null +++ b/stacks/lvl1/swarmprom/weave-compose.yml @@ -0,0 +1,144 @@ +version: "3.3" + +networks: + net: + driver: overlay + attachable: true + +volumes: + prometheus: {} + grafana: {} + +configs: + caddy_config: + file: ./caddy/Caddyfile + dockerd_config: + file: ./dockerd-exporter/Caddyfile + +services: + dockerd-exporter: + image: stefanprodan/caddy + networks: + - net + environment: + - DOCKER_GWBRIDGE_IP=172.18.0.1 + configs: + - source: dockerd_config + target: /etc/caddy/Caddyfile + deploy: + mode: global + + cadvisor: + image: google/cadvisor + networks: + - net + command: -logtostderr -docker_only + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - /:/rootfs:ro + - /var/run:/var/run + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + deploy: + mode: global + + grafana: + image: stefanprodan/swarmprom-grafana:4.6.3 + networks: + - net + environment: + - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin} + - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} + - GF_USERS_ALLOW_SIGN_UP=false + #- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL:-localhost} + #- GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-false} + #- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:-grafana@test.com} + #- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana} + #- GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25} + #- GF_SMTP_USER=${GF_SMTP_USER} + #- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD} + volumes: + - grafana:/var/lib/grafana + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + + node-exporter: + image: stefanprodan/swarmprom-node-exporter:v0.15.2 + networks: + - net + environment: + - NODE_ID={{.Node.ID}} + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /etc/hostname:/etc/nodename + command: + - '--path.sysfs=/host/sys' + - '--path.procfs=/host/proc' + - '--collector.textfile.directory=/etc/node-exporter/' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' + # no collectors are explicitely enabled here, because the defaults are just fine, + # see https://github.com/prometheus/node_exporter + # disable ipvs collector because it barfs the node-exporter logs full with errors on my centos 7 vm's + - '--no-collector.ipvs' + deploy: + mode: global + + caddy: + image: stefanprodan/caddy + ports: + - "3000:3000" + - "9090:9090" + networks: + - net + environment: + - ADMIN_USER=${ADMIN_USER:-admin} + - ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} + configs: + - source: caddy_config + target: /etc/caddy/Caddyfile + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + + prometheus: + image: stefanprodan/swarmprom-prometheus:v2.2.0-rc.0 + networks: + - net + environment: + - WEAVE_TOKEN=$TOKEN + #- JOBS=mongo-exporter:9216 + command: + - '--config.file=/etc/prometheus/weave-cortex.yml' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}' + volumes: + - prometheus:/prometheus + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.role == manager + + scope-launcher: + image: weaveworks/scope-swarm-launcher + networks: + - net + command: scope launch --service-token=$TOKEN + volumes: + - /var/run/docker.sock:/var/run/docker.sock + deploy: + mode: global + restart_policy: + condition: none diff --git a/stacks/lvl2/.DS_Store b/stacks/lvl2/.DS_Store new file mode 100644 index 0000000..6aecb1a Binary files /dev/null and b/stacks/lvl2/.DS_Store differ diff --git a/stacks/lvl2/mongo/stack.yml b/stacks/lvl2/mongo/stack.yml new file mode 100644 index 0000000..d364f20 --- /dev/null +++ b/stacks/lvl2/mongo/stack.yml @@ -0,0 +1,108 @@ +version: "3.7" + +networks: + mother: + external: true + + + + +services: + mongoprimary: + image: registry.vnfco.ir/library/bitnami/mongodb:5.0-debian-10 + volumes: + - /volume/mongo/primary:/data/db + environment: + - MONGODB_ADVERTISED_HOSTNAME=mongoprimary + - MONGODB_REPLICA_SET_MODE=primary + - MONGODB_ROOT_PASSWORD=cWv1WQvWEp+LPdax9We/M6PeT7KgyTE9zKmC5y1ieqz4JtpLiVifJrQg0VHYhE6l + - MONGODB_REPLICA_SET_KEY=gp39MND7udY6bt9V3h9u+dhxToop4WZTZ2Umn810snSYurlSIPqs/oy0YfjftsBz + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + max_attempts: 3 + window: 120s + resources: + limits: + cpus: '0.25' + memory: 300M + replicas: 1 + mode: replicated + placement: + constraints: + - node.labels.mongo.replica == 1 + networks: + - mother + + + mongosecondary: + image: registry.vnfco.ir/library/bitnami/mongodb:5.0-debian-10 + environment: + - MONGODB_REPLICA_SET_MODE=secondary + - MONGODB_INITIAL_PRIMARY_HOST=mongoprimary + - MONGODB_INITIAL_PRIMARY_PORT_NUMBER=27017 + - MONGODB_INITIAL_PRIMARY_ROOT_PASSWORD=cWv1WQvWEp+LPdax9We/M6PeT7KgyTE9zKmC5y1ieqz4JtpLiVifJrQg0VHYhE6l + - MONGODB_REPLICA_SET_KEY=gp39MND7udY6bt9V3h9u+dhxToop4WZTZ2Umn810snSYurlSIPqs/oy0YfjftsBz + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + max_attempts: 3 + window: 120s + resources: + limits: + cpus: '0.25' + memory: 300M + mode: replicated + replicas: 1 + placement: + constraints: + - node.labels.mongo.replica == 2 + port: + - "27011:27017" + networks: + - mother + + mongoarbiter: + image: registry.vnfco.ir/library/bitnami/mongodb:5.0-debian-10 + environment: + - MONGODB_REPLICA_SET_MODE=arbiter + - MONGODB_INITIAL_PRIMARY_HOST=mongoprimary + - MONGODB_INITIAL_PRIMARY_PORT_NUMBER=27017 + - MONGODB_INITIAL_PRIMARY_ROOT_PASSWORD=cWv1WQvWEp+LPdax9We/M6PeT7KgyTE9zKmC5y1ieqz4JtpLiVifJrQg0VHYhE6l + - MONGODB_REPLICA_SET_KEY=gp39MND7udY6bt9V3h9u+dhxToop4WZTZ2Umn810snSYurlSIPqs/oy0YfjftsBz + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + max_attempts: 3 + window: 120s + resources: + limits: + cpus: '0.2' + memory: 100M + mode: replicated + replicas: 0 + placement: + constraints: + - node.labels.mongo.replica == 2 + port: + - "27012:27017" + networks: + - mother + + + diff --git a/stacks/lvl2/pg/stack.yml b/stacks/lvl2/pg/stack.yml new file mode 100644 index 0000000..d48d811 --- /dev/null +++ b/stacks/lvl2/pg/stack.yml @@ -0,0 +1,85 @@ + +version: "3.7" + +networks: + mother: + external: true + + + + +services: + pgmaster: + image: registry.vnfco.ir/library/bitnami/postgresql:13 + volumes: + - '/volume/pg:/bitnami/postgresql' + environment: + - POSTGRESQL_USERNAME=igarsonAgent + - POSTGRESQL_PASSWORD=xHTpBf4wC+bBeNg2pL6Ga7VEWKFJx7VPEUpqxwPFfOc2YYTVwFQuHfsiqoVeT9+6 + - POSTGRESQL_DATABASE=igarsonDB + - POSTGRESQL_REPLICATION_MODE=master + - POSTGRESQL_REPLICATION_USER=pgreplicator + - POSTGRESQL_REPLICATION_PASSWORD=ciXz6xmnEMZSO+0T8L6mGcFJrAvPzkTC04oh/WYIRi51gMQLPfW8tTEHALX6fhk4 + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + max_attempts: 3 + window: 120s + resources: + limits: + cpus: '0.7' + memory: 500M + replicas: 1 + mode: replicated + placement: + constraints: + - node.labels.mongo.replica == 1 + ports: + - 5432 + networks: + - mother + + + pgslave1: + image: registry.vnfco.ir/library/bitnami/postgresql:13 + environment: + - POSTGRESQL_REPLICATION_MODE=slave + - POSTGRESQL_REPLICATION_USER=pgreplicator + - POSTGRESQL_REPLICATION_PASSWORD=ciXz6xmnEMZSO+0T8L6mGcFJrAvPzkTC04oh/WYIRi51gMQLPfW8tTEHALX6fhk4 + - POSTGRESQL_MASTER_HOST=pgmaster + - POSTGRESQL_PASSWORD=xHTpBf4wC+bBeNg2pL6Ga7VEWKFJx7VPEUpqxwPFfOc2YYTVwFQuHfsiqoVeT9+6 + - POSTGRESQL_MASTER_PORT_NUMBER=5432 + + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + max_attempts: 3 + window: 120s + resources: + limits: + cpus: '0.3' + memory: 300M + mode: replicated + replicas: 1 + placement: + constraints: + - node.labels.mongo.replica == 2 + ports: + - 5432 + networks: + - mother + + + + + diff --git a/stacks/lvl2/rabbitmq/stack.yml b/stacks/lvl2/rabbitmq/stack.yml new file mode 100644 index 0000000..90740b3 --- /dev/null +++ b/stacks/lvl2/rabbitmq/stack.yml @@ -0,0 +1,39 @@ + +version: "3.7" + +networks: + mother: + external: true + +services: + rabbitmq: + image: registry.vnfco.ir/library/bitnami/rabbitmq:latest + ports: + - '5672' + environment: + - RABBITMQ_PASSWORD=muyXH/zymcTYLzk3wYnIwG+UJWECKy0ViUDO+UlCmPF3XS+2kliuV0TaA0mWf6lT + - RABBITMQ_USERNAME=igarsonAgent + volumes: + - '/volume/rabbit:/bitnami' + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5672"] + interval: 10s + timeout: 3s + retries: 5 + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.7' + memory: 350M + replicas: 1 + mode: replicated + networks: + - mother \ No newline at end of file diff --git a/stacks/lvl2/redis/stack.yml b/stacks/lvl2/redis/stack.yml new file mode 100644 index 0000000..d608081 --- /dev/null +++ b/stacks/lvl2/redis/stack.yml @@ -0,0 +1,70 @@ + +version: "3.7" + +networks: + mother: + external: true + +services: + redismaster: + image: registry.vnfco.ir/library/bitnami/redis:latest + ports: + - '6969:6379' + environment: + - REDIS_REPLICATION_MODE=master + - REDIS_PASSWORD=Mf7VXwbaJQuMRd1sETVl79A7URUTyiEFwctGk3RhD+q74DmKDzc2SztYEZ2YynLZI8xuq7b9gNz2e7g2MNfU9sP8q+bEK9CqAu3y2Zu4xkVyzyYKdFMT696IB/SS1M7rvZKw7NBYT99lGCpj1edk+6hRpK/Qy2pfiVo29VKEgSXnkeJaM9nMJQ3wxN/1gLJd/uTGGf85nZWhUTEIdK58lJPSXLU55VqcaksahJHlg2oz5P5FNXWRPCnSC7obJ4OtkuHNw/P8REEL0KDyZ9khilU/XvzSgzp+v5i9hmUdKO3B8r+Y4keUZyaKz2VxCHSGAuqWSCpe+do1qQ== + volumes: + - '/volume/redis:/bitnami' + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + max_attempts: 3 + window: 120s + resources: + limits: + cpus: '0.4' + memory: 500M + replicas: 1 + mode: replicated + placement: + constraints: + - node.labels.mongo.replica == 1 + networks: + - mother + + redisreplica: + image: registry.vnfco.ir/library/bitnami/redis:latest + ports: + - '6379' + environment: + - REDIS_REPLICATION_MODE=slave + - REDIS_MASTER_HOST=redismaster + - REDIS_MASTER_PORT_NUMBER=6379 + - REDIS_MASTER_PASSWORD=Mf7VXwbaJQuMRd1sETVl79A7URUTyiEFwctGk3RhD+q74DmKDzc2SztYEZ2YynLZI8xuq7b9gNz2e7g2MNfU9sP8q+bEK9CqAu3y2Zu4xkVyzyYKdFMT696IB/SS1M7rvZKw7NBYT99lGCpj1edk+6hRpK/Qy2pfiVo29VKEgSXnkeJaM9nMJQ3wxN/1gLJd/uTGGf85nZWhUTEIdK58lJPSXLU55VqcaksahJHlg2oz5P5FNXWRPCnSC7obJ4OtkuHNw/P8REEL0KDyZ9khilU/XvzSgzp+v5i9hmUdKO3B8r+Y4keUZyaKz2VxCHSGAuqWSCpe+do1qQ== + - REDIS_PASSWORD=gpGS7RezaKsWRH+qga95z6JTTxvecrpFShRR9IXqWZO2o5Kf8YAoNvGni9Zh5GGIM0Oz+e9+Cph9XDjfs3phgauiFtsEDkAf/1dxR8CixIzddcXKXgaawiqfwB8WCuM/2k8Iw7cv2wqoIdWrhdHbn96N//Su57Ri4hy5CRbUAl7VmGd9EL1sGytOJmXzQOi3lJBWvfVWbYBmgALwVQSads6g+OHmP2wpXsTsYMG1thppJVGo7i5Rh515hAuAGWTed4Ayqoe1muRR1L1Rs8pdA7IH/u4kOL1758Idd3BfTTRkr7xfjXY40dM0BkUqL5E4rOga38ThaVC99g== + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + max_attempts: 3 + window: 120s + resources: + limits: + cpus: '0.3' + memory: 300M + replicas: 1 + mode: replicated + placement: + constraints: + - node.labels.mongo.replica == 2 + networks: + - mother \ No newline at end of file diff --git a/stacks/lvl3/stack.yml b/stacks/lvl3/stack.yml new file mode 100644 index 0000000..30a10cb --- /dev/null +++ b/stacks/lvl3/stack.yml @@ -0,0 +1,128 @@ + +version: "3.7" + +networks: + mother: + external: true + + + + +services: + + ls: + image: registry.vnfco.ir/ig/ls:test + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '1' + memory: 1Gi + replicas: 1 + mode: replicated + # placement: + # constraints: + # - node.labels.overload == 1 + volumes: + - type: bind + source: /volume/ig/ls/wwwroot + target: /publish/wwwroot + ports: + - 8010:8010 + networks: + - mother + + core: + image: registry.vnfco.ir/ig/api:test + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '1' + memory: 1Gi + replicas: 1 + mode: replicated + # placement: + # constraints: + # - node.labels.overload == 1 + volumes: + - type: bind + source: /volume/ig/api/storage + target: /target/storage + ports: + - 8011:8011 + networks: + - mother + + sms: + image: registry.vnfco.ir/ig/sms:test + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + networks: + - mother + + + + + + + + + diff --git a/stacks/lvl4/stack.yml b/stacks/lvl4/stack.yml new file mode 100644 index 0000000..53f7c9e --- /dev/null +++ b/stacks/lvl4/stack.yml @@ -0,0 +1,185 @@ + +version: "3.7" + +networks: + mother: + external: true + + +# ig clients stack + +services: + + menu: + image: registry.vnfco.ir/ig/pwa:test + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 8012:80 + networks: + - mother + + + + capitan: + image: registry.vnfco.ir/ig/garson:test + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 8013:80 + networks: + - mother + + + + website: + image: registry.vnfco.ir/ig/website:test + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 8014:80 + networks: + - mother + + adminpanel: + image: registry.vnfco.ir/ig/admin:test + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 8015:80 + networks: + - mother + +# shop: +# image: registry.vnfco.ir/ig/shop:test +# # environment: +# # - NODE_ENV=production +# # - REACT_APP_SIGNAL_URL=https://ls.igarson.app +# # - REACT_APP_PATH_URL=/api/v1 +# # - REACT_APP_STORAGE_URL=https://storage.igarson.app +# # - REACT_APP_SIGNAL_URL_PATH=/restauranthub +# # - REACT_APP_LOG_LENGTH=20 +# # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools +# # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur +# deploy: +# update_config: +# parallelism: 1 +# delay: 10s +# order: start-first +# restart_policy: +# condition: any +# delay: 5s +# window: 120s +# resources: +# limits: +# cpus: '0.5' +# memory: 50M +# replicas: 1 +# mode: replicated +# placement: +# constraints: +# - node.role == worker +# ports: +# - 8014:80 +# networks: +# - mother \ No newline at end of file diff --git a/stacks/lvl6/stack.yml b/stacks/lvl6/stack.yml new file mode 100644 index 0000000..2f07080 --- /dev/null +++ b/stacks/lvl6/stack.yml @@ -0,0 +1,154 @@ + +version: "3.7" + +networks: + mother: + external: true + + +# ig clients stack + +services: + + twentyfortyeight: + image: registry.vnfco.ir/ig/game/2048:latest + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 6001:80 + networks: + - mother + + + + sudoku: + image: registry.vnfco.ir/ig/game/sudoku:latest + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 6002:80 + networks: + - mother + + + + mafia: + image: registry.vnfco.ir/ig/game/mafia/ui:latest + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 6003:80 + networks: + - mother + + mafiaserver: + image: registry.vnfco.ir/ig/game/mafia/server:latest + # environment: + # - NODE_ENV=production + # - REACT_APP_SIGNAL_URL=https://ls.igarson.app + # - REACT_APP_PATH_URL=/api/v1 + # - REACT_APP_STORAGE_URL=https://storage.igarson.app + # - REACT_APP_SIGNAL_URL_PATH=/restauranthub + # - REACT_APP_LOG_LENGTH=20 + # - REACT_APP_SOCKET_SECRET=GodProtectedKeyTools + # - REACT_APP_SECRET_KEY_GAME=6JCvBmi1ADry5Qk0bLaV+5klnAqYaVtjn+HSHG4N4pOAEl480saobylNWcudDxhvgw6q0yzVmbYH+lQIS3RmEHLXRHYAHxsdXl5MBuqU/p1oCC3SWmYke1SWHkoTUOm1f8SBZ1HucOxBHv7psqFUu5lG38P/EPiozv6/0qDU+vVFFp6v9ZTigBihLll0mU6qR14krot8+y9tOnrLDkHxw3wLvWsB4wgd9+RAY97Gzzs7ix3nAXthsT+jPWdyuENiKSuKEyHKbDqciJRBGqktgYU9XkYIfZzEm4cxhscT9jqNsUg2ml+uJdDQOps2mL5LgNetWWmcP43xPIkxAwu5oK873GV3uZOdQqYE2CBYT66+/+Yp5xzoL4+qZUUiDEpfWKm+clRlAm1Keiur + deploy: + update_config: + parallelism: 1 + delay: 10s + order: start-first + restart_policy: + condition: any + delay: 5s + window: 120s + resources: + limits: + cpus: '0.5' + memory: 50M + replicas: 1 + mode: replicated + placement: + constraints: + - node.role == worker + ports: + - 6023:3000 + networks: + - mother + +# quiz +# quiz api \ No newline at end of file