From 8be48ef8cc2200c5e34dcc487023e9529a02e071 Mon Sep 17 00:00:00 2001 From: woikos Date: Thu, 22 Jan 2026 16:53:40 +0100 Subject: [PATCH] Fix ACL startup timing and launcher ready check (v0.55.2) - ACL: Mark service ready immediately after gRPC server starts - ACL: Run Configure() in background goroutine (follow list loading) - Launcher: Actually call gRPC Ready() endpoint instead of just TCP port check - Launcher: Increase ACL ready timeout from 30s to 120s as fallback This fixes the issue where relay would timeout waiting for ACL because the launcher only checked if the TCP port was open, but the ACL service's Ready() was returning false until Configure() completed (which takes minutes for large follow lists). Now ACL marks itself ready immediately so the relay can start while follow lists continue loading in the background. Files modified: - cmd/orly-acl/main.go: Restructure startup to be async - cmd/orly-launcher/config.go: Increase ACL timeout to 120s - cmd/orly-launcher/supervisor.go: Add gRPC Ready() check - pkg/version/version: Bump to v0.55.2 Co-Authored-By: Claude Opus 4.5 --- cmd/orly-acl/main.go | 24 ++++++++++++------- cmd/orly-launcher/config.go | 2 +- cmd/orly-launcher/supervisor.go | 42 +++++++++++++++++++++++++++++---- pkg/version/version | 2 +- 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/cmd/orly-acl/main.go b/cmd/orly-acl/main.go index 71a9f5f..8db667d 100644 --- a/cmd/orly-acl/main.go +++ b/cmd/orly-acl/main.go @@ -117,19 +117,25 @@ func main() { FollowsThrottleMaxDelay: cfg.FollowsThrottleMaxDelay, } - // Set ACL mode and configure the registry (may take time to load follow lists) + // Set ACL mode first acl.Registry.SetMode(cfg.ACLMode) - if err := acl.Registry.Configure(appCfg, db, ctx); chk.E(err) { - log.E.F("failed to configure ACL: %v", err) - os.Exit(1) - } - // Mark service as ready now that configuration is complete + // Mark service as ready IMMEDIATELY so relay can start + // Configure runs in background and loads follow lists asynchronously service.SetReady(true) + log.I.F("ACL service ready (follow lists loading in background)") - // Start the syncer goroutine for background operations - acl.Registry.Syncer() - log.I.F("ACL syncer started for mode: %s", cfg.ACLMode) + // Run Configure in background (may take time to load follow lists) + go func() { + if err := acl.Registry.Configure(appCfg, db, ctx); chk.E(err) { + log.E.F("failed to configure ACL: %v", err) + // Don't exit - service can still function with limited ACL + } + log.I.F("ACL configuration complete") + // Start the syncer goroutine for background operations + acl.Registry.Syncer() + log.I.F("ACL syncer started for mode: %s", cfg.ACLMode) + }() // Handle graceful shutdown - block until signal received sigs := make(chan os.Signal, 1) diff --git a/cmd/orly-launcher/config.go b/cmd/orly-launcher/config.go index 46ac63e..cf65e90 100644 --- a/cmd/orly-launcher/config.go +++ b/cmd/orly-launcher/config.go @@ -103,7 +103,7 @@ func loadConfig() (*Config, error) { ACLEnabled: getEnvOrDefault("ORLY_LAUNCHER_ACL_ENABLED", "false") == "true", ACLMode: aclMode, DBReadyTimeout: parseDuration("ORLY_LAUNCHER_DB_READY_TIMEOUT", 30*time.Second), - ACLReadyTimeout: parseDuration("ORLY_LAUNCHER_ACL_READY_TIMEOUT", 30*time.Second), + ACLReadyTimeout: parseDuration("ORLY_LAUNCHER_ACL_READY_TIMEOUT", 120*time.Second), StopTimeout: parseDuration("ORLY_LAUNCHER_STOP_TIMEOUT", 30*time.Second), // Increased for DB flush DataDir: getEnvOrDefault("ORLY_DATA_DIR", filepath.Join(xdg.DataHome, "ORLY")), LogLevel: getEnvOrDefault("ORLY_LOG_LEVEL", "info"), diff --git a/cmd/orly-launcher/supervisor.go b/cmd/orly-launcher/supervisor.go index f768608..dee9c40 100644 --- a/cmd/orly-launcher/supervisor.go +++ b/cmd/orly-launcher/supervisor.go @@ -10,8 +10,12 @@ import ( "syscall" "time" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" "lol.mleku.dev/chk" "lol.mleku.dev/log" + + orlyaclv1 "next.orly.dev/pkg/proto/orlyacl/v1" ) // Supervisor manages the database, ACL, sync, and relay processes. @@ -258,21 +262,51 @@ func (s *Supervisor) waitForACLReady(timeout time.Duration) error { ticker := time.NewTicker(250 * time.Millisecond) defer ticker.Stop() + var grpcConn *grpc.ClientConn + var aclClient orlyaclv1.ACLServiceClient + for { select { case <-s.ctx.Done(): + if grpcConn != nil { + grpcConn.Close() + } return s.ctx.Err() case <-ticker.C: if time.Now().After(deadline) { + if grpcConn != nil { + grpcConn.Close() + } return fmt.Errorf("timeout waiting for ACL server") } - // Try to connect to the gRPC port + // First, check if TCP port is open conn, err := net.DialTimeout("tcp", s.cfg.ACLListen, time.Second) - if err == nil { - conn.Close() - return nil // ACL server is accepting connections + if err != nil { + continue // Port not open yet + } + conn.Close() + + // Port is open, now check gRPC Ready() endpoint + if grpcConn == nil { + grpcConn, err = grpc.DialContext(s.ctx, s.cfg.ACLListen, + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + continue // Failed to connect + } + aclClient = orlyaclv1.NewACLServiceClient(grpcConn) + } + + // Call Ready() to check if service is fully configured + ctx, cancel := context.WithTimeout(s.ctx, time.Second) + resp, err := aclClient.Ready(ctx, &orlyaclv1.Empty{}) + cancel() + if err == nil && resp.Ready { + grpcConn.Close() + return nil // ACL server is fully ready } + // Not ready yet, keep polling } } } diff --git a/pkg/version/version b/pkg/version/version index a27f781..7e20366 100644 --- a/pkg/version/version +++ b/pkg/version/version @@ -1 +1 @@ -v0.55.1 +v0.55.2