Re-adding Infinispan workarounds to prevent deadlocks (#22058)

Relates to #9871
Closes #22057
This commit is contained in:
Alexander Schwartz 2023-07-31 10:37:28 +02:00 committed by GitHub
parent c907fde7f5
commit cf911075af
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 24 deletions

View file

@ -33,6 +33,7 @@ import org.keycloak.cluster.ClusterProviderFactory;
import org.keycloak.common.Profile;
import org.keycloak.common.util.Retry;
import org.keycloak.common.util.Time;
import org.keycloak.connections.infinispan.DefaultInfinispanConnectionProviderFactory;
import org.keycloak.connections.infinispan.InfinispanConnectionProvider;
import org.keycloak.connections.infinispan.TopologyInfo;
import org.keycloak.models.KeycloakSession;
@ -220,7 +221,17 @@ public class InfinispanClusterProviderFactory implements ClusterProviderFactory,
}
logger.debugf("Nodes %s removed from cluster. Removing tasks locked by this nodes", removedNodesAddresses.toString());
workCache.entrySet().removeIf(new LockEntryPredicate(removedNodesAddresses));
/*
workaround for Infinispan 12.1.7.Final to prevent a deadlock while
DefaultInfinispanConnectionProviderFactory is shutting down PersistenceManagerImpl
that acquires a writeLock and this removal that acquires a readLock.
First seen with https://issues.redhat.com/browse/ISPN-13664 and still occurs probably due to
https://issues.redhat.com/browse/ISPN-13666 in 13.0.10
Tracked in https://github.com/keycloak/keycloak/issues/9871
*/
synchronized (DefaultInfinispanConnectionProviderFactory.class) {
workCache.entrySet().removeIf(new LockEntryPredicate(removedNodesAddresses));
}
}
} catch (Throwable t) {
logger.error("caught exception in ViewChangeListener", t);

View file

@ -51,6 +51,7 @@ import org.keycloak.cluster.ClusterListener;
import org.keycloak.cluster.ClusterProvider;
import org.keycloak.common.util.ConcurrentMultivaluedHashMap;
import org.keycloak.common.util.Retry;
import org.keycloak.connections.infinispan.DefaultInfinispanConnectionProviderFactory;
import org.keycloak.executors.ExecutorsProvider;
import org.keycloak.models.KeycloakSession;
import org.infinispan.client.hotrod.exceptions.HotRodClientException;
@ -70,7 +71,7 @@ public class InfinispanNotificationsManager {
private final Cache<String, Serializable> workCache;
private final RemoteCache<String, Serializable> workRemoteCache;
private final RemoteCache workRemoteCache;
private final String myAddress;
@ -79,7 +80,7 @@ public class InfinispanNotificationsManager {
private final ExecutorService listenersExecutor;
protected InfinispanNotificationsManager(Cache<String, Serializable> workCache, RemoteCache<String, Serializable> workRemoteCache, String myAddress, String mySite, ExecutorService listenersExecutor) {
protected InfinispanNotificationsManager(Cache<String, Serializable> workCache, RemoteCache workRemoteCache, String myAddress, String mySite, ExecutorService listenersExecutor) {
this.workCache = workCache;
this.workRemoteCache = workRemoteCache;
this.myAddress = myAddress;
@ -90,7 +91,7 @@ public class InfinispanNotificationsManager {
// Create and init manager including all listeners etc
public static InfinispanNotificationsManager create(KeycloakSession session, Cache<String, Serializable> workCache, String myAddress, String mySite, Set<RemoteStore> remoteStores) {
RemoteCache<String, Serializable> workRemoteCache = null;
RemoteCache workRemoteCache = null;
if (!remoteStores.isEmpty()) {
RemoteStore remoteStore = remoteStores.iterator().next();
@ -157,11 +158,21 @@ public class InfinispanNotificationsManager {
// Add directly to remoteCache. Will notify remote listeners on all nodes in all DCs
Retry.executeWithBackoff((int iteration) -> {
try {
workRemoteCache.put(eventKey, wrappedEvent, 120, TimeUnit.SECONDS);
/*
workaround for Infinispan 12.1.7.Final to prevent a deadlock while
DefaultInfinispanConnectionProviderFactory is shutting down PersistenceManagerImpl
that acquires a writeLock and this put that acquires a readLock.
First seen with https://issues.redhat.com/browse/ISPN-13664 and still occurs probably due to
https://issues.redhat.com/browse/ISPN-13666 in 13.0.10
Tracked in https://github.com/keycloak/keycloak/issues/9871
*/
synchronized (DefaultInfinispanConnectionProviderFactory.class) {
workRemoteCache.put(eventKey, wrappedEvent, 120, TimeUnit.SECONDS);
}
} catch (HotRodClientException re) {
if (logger.isDebugEnabled()) {
logger.debugf(re, "Failed sending notification to remote cache '%s'. Key: '%s', iteration '%s'. Will try to retry the task",
workRemoteCache.getName(), eventKey, iteration);
if (logger.isDebugEnabled()) {
logger.debugf(re, "Failed sending notification to remote cache '%s'. Key: '%s', iteration '%s'. Will try to retry the task",
workRemoteCache.getName(), eventKey, iteration);
}
// Rethrow the exception. Retry will take care of handle the exception and eventually retry the operation.
@ -184,7 +195,7 @@ public class InfinispanNotificationsManager {
@CacheEntryModified
public void cacheEntryModified(CacheEntryModifiedEvent<String, Serializable> event) {
eventReceived(event.getKey(), event.getNewValue());
eventReceived(event.getKey(), event.getValue());
}
@CacheEntryRemoved
@ -198,30 +209,30 @@ public class InfinispanNotificationsManager {
@ClientListener
public class HotRodListener {
private final RemoteCache<String, Serializable> remoteCache;
private final RemoteCache<Object, Object> remoteCache;
public HotRodListener(RemoteCache<String, Serializable> remoteCache) {
public HotRodListener(RemoteCache<Object, Object> remoteCache) {
this.remoteCache = remoteCache;
}
@ClientCacheEntryCreated
public void created(ClientCacheEntryCreatedEvent<String> event) {
String key = event.getKey();
public void created(ClientCacheEntryCreatedEvent event) {
String key = event.getKey().toString();
hotrodEventReceived(key);
}
@ClientCacheEntryModified
public void updated(ClientCacheEntryModifiedEvent<String> event) {
String key = event.getKey();
public void updated(ClientCacheEntryModifiedEvent event) {
String key = event.getKey().toString();
hotrodEventReceived(key);
}
@ClientCacheEntryRemoved
public void removed(ClientCacheEntryRemovedEvent<String> event) {
String key = event.getKey();
public void removed(ClientCacheEntryRemovedEvent event) {
String key = event.getKey().toString();
taskFinished(key, true);
}
@ -230,7 +241,21 @@ public class InfinispanNotificationsManager {
// TODO: Look at CacheEventConverter stuff to possibly include value in the event and avoid additional remoteCache request
try {
listenersExecutor.submit(() -> {
eventReceived(key, remoteCache.get(key));
/*
workaround for Infinispan 12.1.7.Final to prevent a deadlock while
DefaultInfinispanConnectionProviderFactory is shutting down PersistenceManagerImpl
that acquires a writeLock and this get that acquires a readLock.
First seen with https://issues.redhat.com/browse/ISPN-13664 and still occurs probably due to
https://issues.redhat.com/browse/ISPN-13666 in 13.0.10
Tracked in https://github.com/keycloak/keycloak/issues/9871
*/
Object value;
synchronized (DefaultInfinispanConnectionProviderFactory.class) {
value = remoteCache.get(key);
}
eventReceived(key, (Serializable) value);
});
} catch (RejectedExecutionException ree) {
// server is shutting down or pool was terminated - don't throw errors

View file

@ -87,13 +87,23 @@ public class DefaultInfinispanConnectionProviderFactory implements InfinispanCon
@Override
public void close() {
if (cacheManager != null && !containerManaged) {
cacheManager.stop();
/*
workaround for Infinispan 12.1.7.Final to prevent a deadlock while
DefaultInfinispanConnectionProviderFactory is shutting down PersistenceManagerImpl
that acquires a writeLock and this removal that acquires a readLock.
First seen with https://issues.redhat.com/browse/ISPN-13664 and still occurs probably due to
https://issues.redhat.com/browse/ISPN-13666 in 13.0.10
Tracked in https://github.com/keycloak/keycloak/issues/9871
*/
synchronized (DefaultInfinispanConnectionProviderFactory.class) {
if (cacheManager != null && !containerManaged) {
cacheManager.stop();
}
if (remoteCacheProvider != null) {
remoteCacheProvider.stop();
}
cacheManager = null;
}
if (remoteCacheProvider != null) {
remoteCacheProvider.stop();
}
cacheManager = null;
}
@Override