
As LLM applications mature, simple single-shot prompting no longer suffices for complex reasoning tasks. Dynamic prompt chaining enables AI systems to break down problems, maintain context, and achieve higher accuracy through structured multi-step workflows.
What is Prompt Chaining?
Prompt chaining decomposes complex tasks into a sequence of focused prompts, where each prompt builds on previous outputs. This approach reduces errors, improves interpretability, and enables sophisticated reasoning.
Benefits Over Single Prompts
Aspect | Single Prompt | Chained Prompts |
---|---|---|
Accuracy | 60-75% | 85-95% |
Debugging | Hard | Easy (inspect each step) |
Context control | Limited | Precise per step |
Error recovery | None | Retry failed steps |
Cost | Lower | Higher (more calls) |
Basic Chaining Pattern
from anthropic import Anthropic
client = Anthropic()
def chain_prompts(user_query: str):
# Step 1: Decompose query
decompose_prompt = f"""Break this complex question into 2-3 simpler sub-questions:
Question: {user_query}
Sub-questions (numbered list):"""
response1 = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
messages=[{"role": "user", "content": decompose_prompt}]
)
sub_questions = parse_list(response1.content[0].text)
# Step 2: Answer each sub-question
sub_answers = []
for sq in sub_questions:
answer_prompt = f"Answer this specific question concisely:nn{sq}"
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=300,
messages=[{"role": "user", "content": answer_prompt}]
)
sub_answers.append(response.content[0].text)
# Step 3: Synthesize final answer
synthesis_prompt = f"""Original question: {user_query}
Sub-questions and answers:
{format_qa_pairs(sub_questions, sub_answers)}
Provide a comprehensive final answer to the original question:"""
final_response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=800,
messages=[{"role": "user", "content": synthesis_prompt}]
)
return final_response.content[0].text
Chain-of-Thought (CoT) Prompting
Encourage step-by-step reasoning by explicitly requesting it:
def cot_prompt(problem: str):
prompt = f"""Solve this problem step by step. Show your reasoning.
Problem: {problem}
Let's think through this step by step:
1."""
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1500,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
# Example
result = cot_prompt("If a train travels 120 km in 1.5 hours, then slows down by 20%, how long will it take to travel another 100 km?")
Self-Consistency: Multiple Paths
Generate multiple reasoning paths and select the most consistent answer:
def self_consistency(question: str, n_samples: int = 5):
answers = []
for i in range(n_samples):
prompt = f"""Solve this problem. Show your reasoning and final answer clearly.
Problem: {question}
Solution:"""
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1000,
temperature=0.7, # sampling diversity
messages=[{"role": "user", "content": prompt}]
)
answers.append(extract_final_answer(response.content[0].text))
# Return most common answer
from collections import Counter
return Counter(answers).most_common(1)[0][0]
Tree of Thoughts (ToT)
Explore multiple reasoning branches and select the best path:
class ThoughtNode:
def __init__(self, thought: str, parent=None):
self.thought = thought
self.parent = parent
self.children = []
self.score = 0
def tree_of_thoughts(problem: str, depth: int = 3, branching: int = 3):
root = ThoughtNode("Problem: " + problem)
def expand(node: ThoughtNode, current_depth: int):
if current_depth >= depth:
return
# Generate multiple next thoughts
prompt = f"""Given this reasoning so far:
{get_path_to_root(node)}
Generate {branching} different next steps or ideas to solve this problem.
Format: numbered list."""
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
messages=[{"role": "user", "content": prompt}]
)
thoughts = parse_list(response.content[0].text)
# Create child nodes and evaluate
for thought in thoughts[:branching]:
child = ThoughtNode(thought, parent=node)
child.score = evaluate_thought(child)
node.children.append(child)
# Expand best child
best_child = max(node.children, key=lambda n: n.score)
expand(best_child, current_depth + 1)
expand(root, 0)
# Get best path
return get_best_path(root)
def evaluate_thought(node: ThoughtNode):
"""Score a thought's promise (0-10)"""
prompt = f"""Rate how promising this reasoning step is (0-10):
{get_path_to_root(node)}
Score (0-10):"""
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=10,
messages=[{"role": "user", "content": prompt}]
)
return int(response.content[0].text.strip())
ReAct: Reasoning + Acting
Interleave reasoning with tool use for grounded problem-solving:
def react_loop(question: str, max_steps: int = 5):
history = []
for step in range(max_steps):
# Generate thought and action
prompt = f"""Question: {question}
Previous steps:
{format_history(history)}
What should we do next? Format:
Thought: [your reasoning]
Action: [tool_name: parameters]"""
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=300,
messages=[{"role": "user", "content": prompt}]
)
text = response.content[0].text
thought, action = parse_thought_action(text)
# Execute action
observation = execute_tool(action)
history.append({
"thought": thought,
"action": action,
"observation": observation
})
# Check if done
if "FINISH" in action:
return extract_answer(observation)
return "Max steps reached"
# Example tools
def execute_tool(action: str):
if action.startswith("search:"):
query = action.split(":", 1)[1]
return web_search(query)
elif action.startswith("calculate:"):
expr = action.split(":", 1)[1]
return eval(expr)
elif action.startswith("FINISH:"):
return action.split(":", 1)[1]
else:
return "Unknown tool"
Prompt Optimization with DSPy
Automatically optimize prompts through examples:
import dspy
# Configure LLM
lm = dspy.Claude(model="claude-3-5-sonnet-20241022")
dspy.settings.configure(lm=lm)
# Define signature
class QuestionAnswering(dspy.Signature):
"""Answer questions with reasoning"""
question = dspy.InputField()
reasoning = dspy.OutputField(desc="step-by-step reasoning")
answer = dspy.OutputField(desc="final answer")
# Create module
qa = dspy.ChainOfThought(QuestionAnswering)
# Example data
examples = [
dspy.Example(
question="What is 15% of 240?",
answer="36"
).with_inputs("question"),
# ... more examples
]
# Optimize prompts automatically
from dspy.teleprompt import BootstrapFewShot
optimizer = BootstrapFewShot(metric=exact_match)
optimized_qa = optimizer.compile(qa, trainset=examples)
# Use optimized version
result = optimized_qa(question="What is 23% of 450?")
print(result.answer)
Context Window Management
For long chains, manage context strategically:
class ContextManager:
def __init__(self, max_tokens: int = 100000):
self.max_tokens = max_tokens
self.history = []
def add_exchange(self, prompt: str, response: str):
self.history.append({"prompt": prompt, "response": response})
self._prune_if_needed()
def _prune_if_needed(self):
total = sum(estimate_tokens(h["prompt"]) + estimate_tokens(h["response"])
for h in self.history)
while total > self.max_tokens and len(self.history) > 1:
# Keep first (instructions) and last N exchanges
self.history.pop(1)
total = sum(estimate_tokens(h["prompt"]) + estimate_tokens(h["response"])
for h in self.history)
def get_context(self) -> str:
return "nn".join(
f"User: {h['prompt']}nAssistant: {h['response']}"
for h in self.history
)
Error Recovery
Build robustness into chains:
def robust_chain(query: str, max_retries: int = 3):
for attempt in range(max_retries):
try:
result = chain_prompts(query)
# Validate result
if is_valid(result):
return result
else:
# Refine prompt and retry
query = f"{query}nnNote: Previous attempt was invalid. Be more specific."
except Exception as e:
if attempt == max_retries - 1:
raise
time.sleep(2 ** attempt) # exponential backoff
return "Failed after retries"
def is_valid(response: str) -> bool:
# Check for hallucination markers
if "I don't have enough information" in response:
return False
if len(response) < 50: # suspiciously short
return False
return True
Monitoring Chain Performance
import time
from langsmith import Client
langsmith = Client()
def traced_chain(query: str):
run_id = langsmith.create_run(name="chained_qa", inputs={"query": query})
try:
start = time.time()
# Step 1
step1_start = time.time()
sub_questions = decompose(query)
langsmith.create_run(
name="decompose",
parent_run_id=run_id,
inputs={"query": query},
outputs={"sub_questions": sub_questions},
execution_time=time.time() - step1_start
)
# Step 2
step2_start = time.time()
answers = [answer(sq) for sq in sub_questions]
langsmith.create_run(
name="answer_sub_questions",
parent_run_id=run_id,
inputs={"sub_questions": sub_questions},
outputs={"answers": answers},
execution_time=time.time() - step2_start
)
# Step 3
step3_start = time.time()
final = synthesize(query, answers)
langsmith.create_run(
name="synthesize",
parent_run_id=run_id,
inputs={"query": query, "answers": answers},
outputs={"final": final},
execution_time=time.time() - step3_start
)
langsmith.update_run(
run_id,
outputs={"answer": final},
execution_time=time.time() - start,
status="success"
)
return final
except Exception as e:
langsmith.update_run(run_id, status="error", error=str(e))
raise
Cost Optimization
Chaining increases API calls. Optimize by:
- Using smaller models for simple steps (Haiku for routing)
- Caching intermediate results
- Paralleling independent steps
- Short-circuiting when confidence is high
def cost_optimized_chain(query: str):
# Use cheap model to route
is_complex = classify_complexity(query, model="claude-3-haiku-20240307")
if not is_complex:
# Simple query, single shot with cheap model
return single_prompt(query, model="claude-3-haiku-20240307")
else:
# Complex query, full chain with powerful model
return chain_prompts(query, model="claude-3-5-sonnet-20241022")