mattermost · larkox · Aug 20, 2024 · Aug 20, 2024 · Sep 3, 2024 · agnivade
diff --git a/server/android_notification_server.go b/server/android_notification_server.go
@@ -166,16 +166,9 @@ func (me *AndroidNotificationServer) SendNotification(msg *PushNotification) Pus
 		},
 	}
 
-	ctx, cancel := context.WithTimeout(context.Background(), me.sendTimeout)
-	defer cancel()
-
 	me.logger.Infof("Sending android push notification for device=%v type=%v ackId=%v", me.AndroidPushSettings.Type, msg.Type, msg.AckID)
 
-	start := time.Now()
-	_, err := me.client.Send(ctx, fcmMsg)
-	if me.metrics != nil {
-		me.metrics.observerNotificationResponse(PushNotifyAndroid, time.Since(start).Seconds())
-	}
+	err := me.SendNotificationWithRetry(fcmMsg, 0)
 
 	if err != nil {
 		me.logger.Errorf("Failed to send FCM push sid=%v did=%v err=%v type=%v", msg.ServerID, msg.DeviceID, err, me.AndroidPushSettings.Type)
@@ -220,3 +213,33 @@ func (me *AndroidNotificationServer) SendNotification(msg *PushNotification) Pus
 	}
 	return NewOkPushResponse()
 }
+
+func isRetriableError(err error) bool {
+	// Any error that is not a known error, but the Internal error
+	return !(messaging.IsUnregistered(err) ||
+		messaging.IsInvalidArgument(err) ||
+		messaging.IsQuotaExceeded(err) ||
+		messaging.IsSenderIDMismatch(err) ||
+		messaging.IsThirdPartyAuthError(err))
+}
+
+func (me *AndroidNotificationServer) SendNotificationWithRetry(fcmMsg *messaging.Message, retry int) error {
+	ctx, cancel := context.WithTimeout(context.Background(), me.sendTimeout)
+	defer cancel()
+
+	start := time.Now()
+	_, err := me.client.Send(ctx, fcmMsg)
+	if me.metrics != nil {
+		me.metrics.observerNotificationResponse(PushNotifyAndroid, time.Since(start).Seconds())
+	}
+
+	if isRetriableError(err) {
+		me.logger.Errorf("Failed to send android push did=%v retry=%v error=%v", fcmMsg.Token, retry, err)
+		if nextIteration := retry + 1; nextIteration < MAX_RETRIES {
+			return me.SendNotificationWithRetry(fcmMsg, nextIteration)
+		}
+		me.logger.Errorf("Max retries reached did=%v", fcmMsg.Token)
+	}
+
+	return err
+}
diff --git a/server/apple_notification_server.go b/server/apple_notification_server.go
@@ -227,15 +227,8 @@ func (me *AppleNotificationServer) SendNotification(msg *PushNotification) PushR
 
 	if me.AppleClient != nil {
 		me.logger.Infof("Sending apple push notification for device=%v type=%v ackId=%v", me.ApplePushSettings.Type, msg.Type, msg.AckID)
-		start := time.Now()
 
-		ctx, cancel := context.WithTimeout(context.Background(), me.sendTimeout)
-		defer cancel()
-
-		res, err := me.AppleClient.PushWithContext(ctx, notification)
-		if me.metrics != nil {
-			me.metrics.observerNotificationResponse(PushNotifyApple, time.Since(start).Seconds())
-		}
+		res, err := me.SendNotificationWithRetry(notification, 0)
 		if err != nil {
 			me.logger.Errorf("Failed to send apple push sid=%v did=%v err=%v type=%v", msg.ServerID, msg.DeviceID, err, me.ApplePushSettings.Type)
 			if me.metrics != nil {
@@ -269,3 +262,25 @@ func (me *AppleNotificationServer) SendNotification(msg *PushNotification) PushR
 	}
 	return NewOkPushResponse()
 }
+
+func (me *AppleNotificationServer) SendNotificationWithRetry(notification *apns.Notification, retry int) (*apns.Response, error) {
+	start := time.Now()
+
+	ctx, cancel := context.WithTimeout(context.Background(), me.sendTimeout)
+	defer cancel()
+
+	res, err := me.AppleClient.PushWithContext(ctx, notification)
+	if me.metrics != nil {
+		me.metrics.observerNotificationResponse(PushNotifyApple, time.Since(start).Seconds())
+	}
+
+	if err != nil {
+		me.logger.Errorf("Failed to send apple push did=%v retry=%v error=%v", notification.DeviceToken, retry, err)
+		if nextIteration := retry + 1; nextIteration < MAX_RETRIES {
+			return me.SendNotificationWithRetry(notification, retry)
+		}
+		me.logger.Errorf("Max retries reached did=%v", notification.DeviceToken)
+	}
+
+	return res, err
+}
diff --git a/server/server.go b/server/server.go
@@ -27,6 +27,7 @@ const (
 	HEADER_REAL_IP             = "X-Real-IP"
 	WAIT_FOR_SERVER_SHUTDOWN   = time.Second * 5
 	CONNECTION_TIMEOUT_SECONDS = 60
+	MAX_RETRIES                = 3
 )
 
 type NotificationServer interface {