Topic: API key rotation security

MCP server API key rotation security — zero-downtime rotation, session-bound vs server-bound keys, rotation triggers and detection, secret manager integration

API keys in MCP servers are high-value targets because they provide programmatic access to all the services the server connects to. Rotation failures are common: hardcoded keys require restarts, session-bound keys require reconnection, and production servers often run for weeks without a rotation. Five patterns for robust key rotation: zero-downtime rotation protocol, secret manager integration, rotation trigger detection, session-safe key exchange, and key usage auditing.

1. Zero-downtime rotation protocol

Naive key rotation — revoke old key, configure new key — causes an availability gap: any in-flight requests using the old key fail with 401 between when the old key is revoked and when the new key is configured everywhere. For an MCP server handling concurrent sessions, this gap may affect multiple active tool calls simultaneously. Zero-downtime rotation uses a two-key overlap window: the new key is added to the active set, both keys are valid during the overlap period, then the old key is removed after the overlap window expires.

interface CredentialVersion {
  id: string           // version alias (e.g., 'v2026-06-06-01') — not the secret value
  value: string        // the actual credential — never logged
  activatedAt: number
  expiresAt: number | null
}

class ZeroDowntimeCredentialProvider {
  private current: CredentialVersion
  private previous: CredentialVersion | null = null
  private readonly OVERLAP_WINDOW_MS: number

  constructor(initial: CredentialVersion, overlapWindowMs = 30_000) {
    this.current = initial
    this.OVERLAP_WINDOW_MS = overlapWindowMs
  }

  // Step 1: add new credential — old one remains valid
  rotateTo(newVersion: CredentialVersion): void {
    this.previous = this.current
    this.current = newVersion
    // Previous key expires after overlap window
    this.previous.expiresAt = Date.now() + this.OVERLAP_WINDOW_MS
    auditLog.info('credential_rotation_started', {
      previousVersion: this.previous.id,
      newVersion: this.current.id,
      overlapWindowMs: this.OVERLAP_WINDOW_MS
    })
  }

  // Get current credential for outbound requests
  getCurrent(): string {
    return this.current.value
  }

  // Fallback: if current key returns 401, try previous key during overlap window
  getFallback(): string | null {
    if (!this.previous) return null
    if (this.previous.expiresAt && Date.now() > this.previous.expiresAt) {
      // Overlap window expired — old key should now be revoked
      auditLog.info('credential_rotation_complete', { retiredVersion: this.previous.id })
      this.previous = null
      return null
    }
    return this.previous.value
  }

  // Use this wrapper for all outbound API calls
  async callWithRotationFallback<T>(
    fn: (credential: string) => Promise<T>
  ): Promise<T> {
    try {
      return await fn(this.getCurrent())
    } catch (error) {
      // Auth failure during overlap window — try previous key
      if (isAuthError(error) && this.getFallback()) {
        auditLog.warn('credential_fallback_used', { currentVersion: this.current.id })
        return await fn(this.getFallback()!)
      }
      throw error
    }
  }
}

function isAuthError(error: unknown): boolean {
  return error instanceof Error &&
    ('status' in error && (error.status === 401 || error.status === 403))
}

2. Secret manager integration

Hard-coded credential values — in environment variables, in config files, in Docker environment declarations — cannot be rotated without restarting the MCP server process. In production deployments where the server may run continuously for days or weeks, this means rotation requires a scheduled downtime or a rolling restart. Secret manager integration replaces static credential values with a provider that fetches the current secret on a configurable TTL, picking up rotations automatically within the cache window.

import { SecretsManagerClient, GetSecretValueCommand } from '@aws-sdk/client-secrets-manager'

interface SecretManagerConfig {
  secretId: string
  region: string
  ttlMs: number           // cache TTL — shorter than rotation period
  failOpen: boolean       // if true: use cached credential on SM failure; if false: throw
}

class SecretManagerCredentialProvider {
  private cache: { value: string; version: string; fetchedAt: number } | null = null
  private readonly client: SecretsManagerClient

  constructor(private config: SecretManagerConfig) {
    this.client = new SecretsManagerClient({ region: config.region })
  }

  async getCredential(): Promise<{ value: string; version: string }> {
    const now = Date.now()

    // Return cached credential if within TTL
    if (this.cache && now - this.cache.fetchedAt < this.config.ttlMs) {
      return { value: this.cache.value, version: this.cache.version }
    }

    try {
      const response = await this.client.send(
        new GetSecretValueCommand({ SecretId: this.config.secretId })
      )

      const value = response.SecretString ?? ''
      const version = response.VersionId ?? 'unknown'

      this.cache = { value, version, fetchedAt: now }

      auditLog.info('credential_fetched_from_sm', {
        secretId: this.config.secretId,
        versionId: version
        // Never log value
      })

      return { value, version }
    } catch (error) {
      auditLog.error('secret_manager_fetch_failed', {
        secretId: this.config.secretId,
        error: String(error)
      })

      if (this.config.failOpen && this.cache) {
        // Fail-open: use stale cached credential if SM is unreachable
        auditLog.warn('using_stale_credential', {
          secretId: this.config.secretId,
          cacheAgeMs: now - this.cache.fetchedAt
        })
        return { value: this.cache.value, version: this.cache.version }
      }

      // Fail-closed: throw if SM is unreachable and no cache
      throw new Error(`Cannot fetch credential '${this.config.secretId}': secret manager unreachable`)
    }
  }
}

3. Rotation trigger detection

Scheduled rotation (every 24 hours, every 7 days) reduces the exposure window but does not eliminate it: if a key is compromised on day one of a rotation cycle, the attacker has the full rotation period. Rotation trigger detection supplements scheduled rotation by firing an immediate rotation on signals that indicate the current key may have been compromised: a 401 response from a previously-valid key, a pattern match against known leaked credential databases, or an anomalous usage pattern.

import { RotateSecretCommand } from '@aws-sdk/client-secrets-manager'

interface RotationTrigger {
  reason: 'auth_failure_pattern' | 'anomalous_usage' | 'manual' | 'scheduled' | 'secret_scanner_alert'
  secretId: string
  evidence?: string
}

class RotationTriggerDetector {
  private authFailureCounts = new Map<string, { count: number; windowStart: number }>()
  private readonly AUTH_FAILURE_THRESHOLD = 3    // 3 failures in window
  private readonly AUTH_FAILURE_WINDOW_MS = 60_000  // within 60 seconds

  // Call this on every 401/403 response from a downstream API
  recordAuthFailure(secretId: string): void {
    const now = Date.now()
    const record = this.authFailureCounts.get(secretId) ?? { count: 0, windowStart: now }

    if (now - record.windowStart > this.AUTH_FAILURE_WINDOW_MS) {
      record.count = 0
      record.windowStart = now
    }

    record.count++
    this.authFailureCounts.set(secretId, record)

    if (record.count >= this.AUTH_FAILURE_THRESHOLD) {
      // Auth failure pattern detected — trigger immediate rotation
      this.triggerRotation({
        reason: 'auth_failure_pattern',
        secretId,
        evidence: `${record.count} auth failures in ${this.AUTH_FAILURE_WINDOW_MS}ms`
      })
    }
  }

  async triggerRotation(trigger: RotationTrigger): Promise<void> {
    auditLog.warn('rotation_triggered', {
      secretId: trigger.secretId,
      reason: trigger.reason,
      evidence: trigger.evidence,
      severity: trigger.reason === 'auth_failure_pattern' ? 'HIGH' : 'MEDIUM'
    })

    try {
      await secretsManagerClient.send(
        new RotateSecretCommand({ SecretId: trigger.secretId })
      )
      auditLog.info('rotation_initiated', { secretId: trigger.secretId })
    } catch (error) {
      auditLog.error('rotation_failed', { secretId: trigger.secretId, error: String(error) })
      // Alert on-call — rotation failure is a security event
      await alertOnCall(`MCP credential rotation failed for ${trigger.secretId}`)
    }
  }

  // Health check: verify current credential is valid and not near expiry
  async healthCheck(secretId: string, provider: SecretManagerCredentialProvider): Promise<boolean> {
    try {
      const { value, version } = await provider.getCredential()
      const isValid = await validateCredential(secretId, value)
      if (!isValid) {
        this.triggerRotation({ reason: 'auth_failure_pattern', secretId, evidence: 'health check validation failed' })
      }
      return isValid
    } catch {
      return false
    }
  }
}

4. Session-safe key exchange

When a credential is rotated mid-session, ongoing tool calls that were initiated with the old credential must be allowed to complete before the old credential is revoked. Abruptly revoking a credential while a tool call is in-flight causes that call to fail mid-execution — which may leave external state in an inconsistent state (a write operation that partially succeeded, a transaction left open). Session-safe key exchange tracks which credential version each active session is using and delays revocation of the old credential until all sessions using it have either completed or reconnected with the new credential.

interface SessionCredentialBinding {
  sessionId: string
  credentialVersion: string
  boundAt: number
  activeToolCalls: number
}

class SessionSafeKeyExchange {
  private sessionBindings = new Map<string, SessionCredentialBinding>()
  private pendingRevocations = new Map<string, { version: string; revokeAt: number }>()

  // Called when a session starts — bind it to the current credential version
  bindSession(sessionId: string, credentialVersion: string): void {
    this.sessionBindings.set(sessionId, {
      sessionId,
      credentialVersion,
      boundAt: Date.now(),
      activeToolCalls: 0
    })
  }

  // Called when a tool call starts — track in-flight count
  incrementActiveToolCalls(sessionId: string): void {
    const binding = this.sessionBindings.get(sessionId)
    if (binding) binding.activeToolCalls++
  }

  // Called when a tool call completes — decrement and check for pending revocations
  decrementActiveToolCalls(sessionId: string): void {
    const binding = this.sessionBindings.get(sessionId)
    if (binding) {
      binding.activeToolCalls = Math.max(0, binding.activeToolCalls - 1)
      this.checkPendingRevocations()
    }
  }

  // Schedule revocation of a credential version — waits for sessions to drain
  scheduleRevocation(oldVersion: string, maxWaitMs = 30_000): void {
    this.pendingRevocations.set(oldVersion, {
      version: oldVersion,
      revokeAt: Date.now() + maxWaitMs
    })

    auditLog.info('revocation_scheduled', {
      version: oldVersion,
      maxWaitMs,
      sessionsUsingVersion: this.countSessionsUsingVersion(oldVersion)
    })

    this.checkPendingRevocations()
  }

  private checkPendingRevocations(): void {
    const now = Date.now()
    for (const [version, pending] of this.pendingRevocations) {
      const sessionsStillUsing = this.countSessionsUsingVersion(version)
      const hasPendingCalls = this.hasActiveToolCalls(version)

      if (sessionsStillUsing === 0 || !hasPendingCalls || now >= pending.revokeAt) {
        // Safe to revoke — no in-flight calls using this version
        this.revokeCredentialVersion(version)
        this.pendingRevocations.delete(version)
      }
    }
  }

  private countSessionsUsingVersion(version: string): number {
    return Array.from(this.sessionBindings.values())
      .filter(b => b.credentialVersion === version).length
  }

  private hasActiveToolCalls(version: string): boolean {
    return Array.from(this.sessionBindings.values())
      .some(b => b.credentialVersion === version && b.activeToolCalls > 0)
  }

  private async revokeCredentialVersion(version: string): Promise<void> {
    auditLog.info('credential_version_revoked', { version })
    // Actual revocation call to secret manager or API key service
    await revokeApiKeyVersion(version)
  }
}

5. Key usage auditing

After a credential rotation, you may need to determine whether the old credential was used maliciously during the window when it may have been compromised — and if so, what API calls were made. This requires an audit log that associates each external API call with the credential version that was used, without logging the credential value itself. A version alias (a date-stamped identifier or UUID assigned to each credential generation) allows post-rotation forensics without exposing the credential in logs.

interface ApiCallAuditRecord {
  timestamp: string
  sessionId: string
  toolName: string
  credentialVersionId: string  // version alias — NOT the credential value
  destinationService: string
  destinationEndpoint: string
  responseStatus: number
  durationMs: number
}

class KeyUsageAuditor {
  private readonly credentialAliases = new Map<string, string>()
  // Maps credential value hash → version alias
  // The hash is a one-way mapping — we can identify the version but cannot recover the value

  registerCredential(credentialValue: string, versionAlias: string): void {
    // Store a hash of the value, not the value itself
    const valueHash = createHash('sha256').update(credentialValue).digest('hex').slice(0, 8)
    this.credentialAliases.set(valueHash, versionAlias)
  }

  getVersionAlias(credentialValue: string): string {
    const valueHash = createHash('sha256').update(credentialValue).digest('hex').slice(0, 8)
    return this.credentialAliases.get(valueHash) ?? 'unknown'
  }

  auditApiCall(params: {
    sessionId: string
    toolName: string
    credentialValue: string
    destinationService: string
    destinationEndpoint: string
    responseStatus: number
    durationMs: number
  }): void {
    const record: ApiCallAuditRecord = {
      timestamp: new Date().toISOString(),
      sessionId: params.sessionId,
      toolName: params.toolName,
      credentialVersionId: this.getVersionAlias(params.credentialValue),
      destinationService: params.destinationService,
      destinationEndpoint: params.destinationEndpoint,
      responseStatus: params.responseStatus,
      durationMs: params.durationMs
    }

    auditLog.info('api_call', record)

    // Alert on unexpected usage patterns
    if (params.responseStatus === 401 || params.responseStatus === 403) {
      auditLog.warn('api_auth_failure', {
        ...record,
        severity: 'MEDIUM',
        note: 'Auth failure may indicate key rotation propagation lag or compromised credential'
      })
    }
  }

  // Post-rotation forensics query: what did the old key version do?
  async queryCallsByVersion(versionAlias: string, fromTime: Date, toTime: Date): Promise<ApiCallAuditRecord[]> {
    // Query your audit log store for records matching this credential version
    return auditLogStore.query({
      credentialVersionId: versionAlias,
      timestampFrom: fromTime.toISOString(),
      timestampTo: toTime.toISOString()
    })
  }
}

SkillAudit checks for API key rotation security

SkillAudit scans for these patterns automatically. Scan your MCP server.