Update Keycloak HA Guide new resource limit settings (#27079)

Closes #27078 Signed-off-by: Alexander Schwartz <aschwart@redhat.com>
2024-02-19 10:41:49 +01:00 · 2024-02-19 10:41:49 +01:00 · 5f797e3e71
commit 5f797e3e71
parent 7ce1c302fc
7 changed files with 87 additions and 69 deletions
--- a/docs/guides/high-availability/concepts-memory-and-cpu-sizing.adoc
+++ b/docs/guides/high-availability/concepts-memory-and-cpu-sizing.adoc
@ -31,15 +31,16 @@ Summary:

 Recommendations:

-* The base memory usage for an inactive Pod is 1 GB of RAM.
-
-* Leave 1 GB extra head-room for spikes of RAM.
+* The base memory usage for an inactive Pod is 1000 MB of RAM.

 * For each 100,000 active user sessions, add 500 MB per Pod in a three-node cluster (tested with up to 200,000 sessions).
 +
 This assumes that each user connects to only one client.
 Memory requirements increase with the number of client sessions per user session (not tested yet).

+* In containers, Keycloak allocates 70% of the memory limit for heap based memory. It will also use approximately 300 MB of non-heap-based memory.
+To calculate the requested memory, use the calculation above. As memory limit, subtract the non-heap memory from the value above and divide the result by 0.7.
+
 * For each 30 user logins per second, 1 vCPU per Pod in a three-node cluster (tested with up to 300 per second).
 +
 {project_name} spends most of the CPU time hashing the password provided by the user.
@ -48,7 +49,7 @@ Memory requirements increase with the number of client sessions per user session
 +
 Most CPU time goes into creating new TLS connections, as each client runs only a single request.

-* For each 350 refresh token requests per second, 1 vCPU per Pod in a three node cluster (tested with up to 435 refresh token requests per second).
+* For each 350 refresh token requests per second, 1 vCPU per Pod in a three-node cluster (tested with up to 435 refresh token requests per second).

 * Leave 200% extra head-room for CPU usage to handle spikes in the load.
 This ensures a fast startup of the node, and sufficient capacity to handle failover tasks like, for example, re-balancing Infinispan caches, when one node fails.
@ -73,19 +74,19 @@ Limits calculated:
 +
 (Allow for three times the CPU requested to handle peaks, startups and failover tasks, and also refresh token handling which we don't have numbers on, yet)

-* Memory requested: 1.25 GB
+* Memory requested: 1250 MB
 +
-(1 GB base memory plus 250 MB RAM for 50,000 active sessions)
+(1000 MB base memory plus 250 MB RAM for 50,000 active sessions)

-* Memory limit: 2.25 GB
+* Memory limit: 1360 GB
 +
-(adding 1 GB to the memory requested)
+(1250 MB expected memory usage minus 300 non-heap-usage, divided by 0.7)

 == Reference architecture

 The following setup was used to retrieve the settings above to run tests of about 10 minutes for different scenarios:

-* OpenShift 4.13.x deployed on AWS via ROSA.
+* OpenShift 4.14.x deployed on AWS via ROSA.
 * Machinepool with `m5.4xlarge` instances.
 * {project_name} deployed with the Operator and 3 pods.
 * Default user password hashing with PBKDF2(SHA512) 210,000 hash iterations (which is the default).
--- a/docs/guides/high-availability/deploy-keycloak-kubernetes.adoc
+++ b/docs/guides/high-availability/deploy-keycloak-kubernetes.adoc
@ -42,8 +42,6 @@ Use a reverse proxy in front of {project_name} to filter out those URLs.
 The number of all {project_name} threads in the StatefulSet should not exceed the number of JGroup threads to avoid a JGroup thread pool exhaustion which could stall {project_name} request processing.
 You might consider limiting the number of {project_name} threads further because multiple concurrent threads will lead to throttling by Kubernetes once the requested CPU limit is reached.
 See the <@links.ha id="concepts-threads" /> {section} for details.
-<5> The JVM options set additional parameters:
-* Adjust the memory settings for the heap.

 == Verifying the deployment

--- a/docs/guides/high-availability/examples/generated/ispn-single.yaml
+++ b/docs/guides/high-availability/examples/generated/ispn-single.yaml
@ -52,6 +52,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -72,6 +73,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -92,6 +94,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -112,6 +115,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -132,6 +136,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -152,6 +157,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -172,6 +178,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -192,6 +199,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      
@ -216,11 +224,11 @@ spec:
  expose:
    type: Route
  configMapName: "cluster-config"
-  image: quay.io/infinispan/server:14.0.16.Final
+  image: quay.io/infinispan/server:14.0.24.Final
  configListener:
    enabled: false
  container:
-    extraJvmOpts: '-Dorg.infinispan.openssl=false -Dinfinispan.cluster.name=ISPN  -Djgroups.xsite.fd.interval=2000 -Djgroups.xsite.fd.timeout=10000'
+    extraJvmOpts: '-Dorg.infinispan.openssl=false -Dinfinispan.cluster.name=ISPN  -Djgroups.xsite.fd.interval=2000 -Djgroups.xsite.fd.timeout=15000'
  logging:
    categories:
      org.infinispan: info
--- a/docs/guides/high-availability/examples/generated/ispn-site-a.yaml
+++ b/docs/guides/high-availability/examples/generated/ispn-site-a.yaml
@ -138,12 +138,14 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-actionTokens[]
@ -163,6 +165,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -170,6 +173,7 @@ spec:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-authenticationSessions[]
@ -189,6 +193,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -196,6 +201,7 @@ spec:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-clientSessions[]
@ -215,12 +221,14 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-loginFailures[]
@ -240,6 +248,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -247,6 +256,7 @@ spec:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-offlineClientSessions[]
@ -266,6 +276,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -273,6 +284,7 @@ spec:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-offlineSessions[]
@ -292,6 +304,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -299,6 +312,7 @@ spec:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-sessions[]
@ -318,12 +332,14 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
        site-b: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-work[]
@ -347,11 +363,11 @@ spec:
  expose:
    type: Route
  configMapName: "cluster-config"
-  image: quay.io/infinispan/server:14.0.16.Final
+  image: quay.io/infinispan/server:14.0.24.Final
  configListener:
    enabled: false
  container:
-    extraJvmOpts: '-Dorg.infinispan.openssl=false -Dinfinispan.cluster.name=ISPN  -Djgroups.xsite.fd.interval=2000 -Djgroups.xsite.fd.timeout=10000'
+    extraJvmOpts: '-Dorg.infinispan.openssl=false -Dinfinispan.cluster.name=ISPN  -Djgroups.xsite.fd.interval=2000 -Djgroups.xsite.fd.timeout=15000'
  logging:
    categories:
      org.infinispan: info
@ -369,6 +385,9 @@ spec:
        # end::infinispan-crossdc[]
        discovery:
          launchGossipRouter: true
+          heartbeats:
+            interval: 2000
+            timeout: 8000
        # tag::infinispan-crossdc[]
        expose:
          type: Route # <5>
--- a/docs/guides/high-availability/examples/generated/ispn-site-b.yaml
+++ b/docs/guides/high-availability/examples/generated/ispn-site-b.yaml
@ -138,12 +138,14 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-actionTokens[]
@ -163,6 +165,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -170,6 +173,7 @@ spec:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-authenticationSessions[]
@ -189,6 +193,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -196,6 +201,7 @@ spec:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-clientSessions[]
@ -215,12 +221,14 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-loginFailures[]
@ -240,6 +248,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -247,6 +256,7 @@ spec:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-offlineClientSessions[]
@ -266,6 +276,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -273,6 +284,7 @@ spec:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-offlineSessions[]
@ -292,6 +304,7 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
@ -299,6 +312,7 @@ spec:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-sessions[]
@ -318,12 +332,14 @@ spec:
      mode: "SYNC"
      owners: "2"
      statistics: "true"
+      remoteTimeout: 14000
      stateTransfer:
        chunkSize: 16
      backups:
        site-a: # <2>
          backup:
            strategy: "SYNC" # <3>
+            timeout: 13000
            stateTransfer:
              chunkSize: 16
 # end::infinispan-cache-work[]
@ -347,11 +363,11 @@ spec:
  expose:
    type: Route
  configMapName: "cluster-config"
-  image: quay.io/infinispan/server:14.0.16.Final
+  image: quay.io/infinispan/server:14.0.24.Final
  configListener:
    enabled: false
  container:
-    extraJvmOpts: '-Dorg.infinispan.openssl=false -Dinfinispan.cluster.name=ISPN  -Djgroups.xsite.fd.interval=2000 -Djgroups.xsite.fd.timeout=10000'
+    extraJvmOpts: '-Dorg.infinispan.openssl=false -Dinfinispan.cluster.name=ISPN  -Djgroups.xsite.fd.interval=2000 -Djgroups.xsite.fd.timeout=15000'
  logging:
    categories:
      org.infinispan: info
@ -369,6 +385,9 @@ spec:
        # end::infinispan-crossdc[]
        discovery:
          launchGossipRouter: true
+          heartbeats:
+            interval: 2000
+            timeout: 8000
        # tag::infinispan-crossdc[]
        expose:
          type: Route # <5>
--- a/docs/guides/high-availability/examples/generated/keycloak-ispn.yaml
+++ b/docs/guides/high-availability/examples/generated/keycloak-ispn.yaml
--- a/docs/guides/high-availability/examples/generated/keycloak.yaml
+++ b/docs/guides/high-availability/examples/generated/keycloak.yaml