Fix: anthropic.APIStatusError: 529 Overloaded — Anthropic API
Updated 2026-03-06
The Error
When Anthropic’s API is at capacity, you’ll see:
anthropic.APIStatusError: Error code: 529 - {'type': 'error', 'error': {'type': 'overloaded_error', 'message': 'Overloaded'}}
The full traceback typically looks like:
Traceback (most recent call last):
File "app.py", line 15, in <module>
response = client.messages.create(
File "anthropic/client.py", line 234, in create
return self._post(...)
File "anthropic/base_client.py", line 456, in _post
raise APIStatusError(...)
anthropic.APIStatusError: Error code: 529 - {'type': 'error', 'error': {'type': 'overloaded_error', 'message': 'Overloaded'}}
What This Means
HTTP 529 is Anthropic’s custom status code indicating their servers are temporarily overloaded due to traffic spikes, new model launches, or peak usage windows. This is not your fault — it’s not a rate limit (429) or service outage (503). It means Anthropic’s infrastructure capacity is saturated at that moment, and you should retry. The good news: 529s typically resolve within 10–60 seconds, making them excellent candidates for aggressive exponential backoff.
The Fix
Fix 1: Retry with Exponential Backoff + Jitter
Use the tenacity library for clean, battle-tested retry logic:
#Version: anthropic==0.49.0, tenacity==8.2.3
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception,
)
from anthropic import APIStatusError
import anthropic
def is_529_error(exc):
"""Check if exception is a 529 Overloaded error."""
return isinstance(exc, APIStatusError) and exc.status_code == 529
@retry(
retry=retry_if_exception(is_529_error),
stop=stop_after_attempt(8),
wait=wait_exponential(multiplier=1, min=1, max=32),
)
def call_claude_with_retry():
client = anthropic.Anthropic()
return client.messages.create(
model="claude-opus-4-5",
max_tokens=1024,
messages=[
{"role": "user", "content": "What is 2+2?"}
],
)
try:
response = call_claude_with_retry()
print(response.content[0].text)
except APIStatusError as e:
print(f"Failed after retries: {e}")
Without tenacity (manual backoff):
import anthropic
import time
import random
from anthropic import APIStatusError
def call_claude_with_manual_retry(max_retries=8):
client = anthropic.Anthropic()
for attempt in range(max_retries):
try:
return client.messages.create(
model="claude-opus-4-5",
max_tokens=1024,
messages=[
{"role": "user", "content": "Summarize quantum computing."}
],
)
except APIStatusError as e:
if e.status_code != 529:
raise # Don't retry non-529 errors
if attempt == max_retries - 1:
raise # Last attempt failed
# Exponential backoff with jitter
base_delay = 2 ** attempt
jitter = random.uniform(0, base_delay * 0.1)
wait_time = min(base_delay + jitter, 32)
print(f"Attempt {attempt + 1} failed with 529. Retrying in {wait_time:.2f}s...")
time.sleep(wait_time)
raise RuntimeError("Max retries exceeded")
response = call_claude_with_manual_retry()
print(response.content[0].text)
Fix 2: Detect 529 vs 429 and Handle Differently
The retry strategy depends on the error code:
import anthropic
import time
from anthropic import APIStatusError
def call_with_smart_backoff(model="claude-opus-4-5"):
client = anthropic.Anthropic()
attempt = 0
while attempt < 8:
try:
return client.messages.create(
model=model,
max_tokens=1024,
messages=[
{"role": "user", "content": "Hello, world!"}
],
)
except APIStatusError as e:
attempt += 1
if e.status_code == 529:
# 529: Overloaded — retry immediately with short backoff
wait_time = min(2 ** attempt, 30)
print(f"API overloaded (529). Retry #{attempt} in {wait_time}s...")
time.sleep(wait_time)
elif e.status_code == 429:
# 429: Rate limit — check retry-after header
retry_after = int(e.response.headers.get("retry-after", 60))
print(f"Rate limited (429). Waiting {retry_after}s per server...")
time.sleep(retry_after)
attempt += 1
else:
# Other errors: don't retry
raise
raise RuntimeError("Max retries exceeded")
response = call_with_smart_backoff()
print(response.content[0].text)
Fix 3: Fallback to Different Model During Overload
For non-critical workloads, gracefully degrade to a faster model:
import anthropic
from anthropic import APIStatusError
def call_with_fallback(primary_model="claude-opus-4-5"):
fallback_model = "claude-haiku-4-5"
client = anthropic.Anthropic()
models_to_try = [primary_model, fallback_model]
for model in models_to_try:
for attempt in range(4):
try:
return client.messages.create(
model=model,
max_tokens=1024,
messages=[
{"role": "user", "content": "Explain machine learning."}
],
)
except APIStatusError as e:
if e.status_code == 529:
wait_time = 2 ** attempt
print(f"{model} overloaded. Retry {attempt + 1}/4 in {wait_time}s...")
import time
time.sleep(wait_time)
else:
raise
print(f"Primary model failed. Falling back to {fallback_model}...")
raise RuntimeError("All models failed")
response = call_with_fallback()
print(response.content[0].text)
Fix 4: Circuit Breaker for Production
Prevent thundering herd by tracking consecutive 529s:
import anthropic
import time
from anthropic import APIStatusError
from enum import Enum
class CircuitState(Enum):
CLOSED = "closed" # Normal operation
OPEN = "open" # Stop sending requests
HALF_OPEN = "half_open" # Test if recovered
class CircuitBreaker:
def __init__(self, failure_threshold=3, recovery_timeout=30):
self.failure_threshold = failure_threshold
self.recovery_timeout = recovery_timeout
self.state = CircuitState.CLOSED
self.failure_count = 0
self.last_failure_time = None
def call(self, func):
if self.state == CircuitState.OPEN:
elapsed = time.time() - self.last_failure_time
if elapsed > self.recovery_timeout:
self.state = CircuitState.HALF_OPEN
print("Circuit breaker: Testing recovery...")
else:
raise RuntimeError(f"Circuit breaker OPEN. Wait {self.recovery_timeout - elapsed:.0f}s")
try:
result = func()
self.state = CircuitState.CLOSED
self.failure_count = 0
return result
except APIStatusError as e:
if e.status_code == 529:
self.failure_count += 1
self.last_failure_time = time.time()
if self.failure_count >= self.failure_threshold:
self.state = CircuitState.OPEN
print(f"Circuit breaker OPEN after {self.failure_count} failures")
raise
else:
raise
#Usage
breaker = CircuitBreaker(failure_threshold=3, recovery_timeout=30)
client = anthropic.Anthropic()
for i in range(10):
try:
def make_request():
return client.messages.create(
model="claude-opus-4-5",
max_tokens=512,
messages=[{"role": "user", "content": "Hi"}],
)
response = breaker.call(make_request)
print(f"Request {i+1} succeeded")
except RuntimeError as e:
print(f"Request {i+1} blocked: {e}")
except APIStatusError as e:
print(f"Request {i+1} hit 529, circuit tracking...")
time.sleep(1)
Why This Happens
Anthropic’s infrastructure handles billions of tokens daily. A 529 occurs when request volume exceeds provisioned capacity, typically during:
- New model launches (claude-opus-4-6 releases cause spikes)
- Peak hours (9–11am PT is consistently worst)
- Viral prompts (a popular tool gets featured and traffic surges)
- Retry cascades (other users’ failed retries compound load)
Unlike 429 (rate limit, which is per-user quota) or 503 (service-wide outage), 529 is capacity saturation — Anthropic’s auto-scaling is catching up. Check status.anthropic.com for incident reports; most 529s resolve in under a minute.
Edge Cases
Streaming responses: If you’re using client.messages.stream() and hit a 529 mid-stream, the exception fires when the stream is consumed, not when created:
try:
with client.messages.stream(...) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
except APIStatusError as e:
if e.status_code == 529:
# Handle mid-stream 529
print("\nStream interrupted by overload")
Async clients: Use asyncio + aiohttp for concurrent retries without blocking:
import asyncio
from anthropic import AsyncAnthropic
async def retry_async(max_attempts=8):
client = AsyncAnthropic()
for attempt in range(max_attempts):
try:
return await client.messages.create(...)
except APIStatusError as e:
if e.status_code == 529:
await asyncio.sleep(2 ** attempt)
Background jobs: If you see 3+ 529s in 5 minutes, pause your job queue for 30 seconds rather than hammering the API. This gives Anthropic capacity time to recover.
See Also
- Fix: openai.RateLimitError: You exceeded your current quota
- Fix: LangChain OutputParserException
- The Risks of Agentic AI
- Self-Host Langfuse for LLM Observability
- AI Agent Workflows Cheat Sheet
Was this article helpful?
Thanks for your feedback!