PROMPT AND CONTEXT ENGINEERING

Advanced Prompt Engineering for Complex Tasks

Advanced prompt engineering techniques for complex AI reasoning tasks

As LLM applications mature, simple single-shot prompting no longer suffices for complex reasoning tasks. Dynamic prompt chaining enables AI systems to break down problems, maintain context, and achieve higher accuracy through structured multi-step workflows.

What is Prompt Chaining?

Prompt chaining decomposes complex tasks into a sequence of focused prompts, where each prompt builds on previous outputs. This approach reduces errors, improves interpretability, and enables sophisticated reasoning.

Benefits Over Single Prompts

Aspect Single Prompt Chained Prompts
Accuracy 60-75% 85-95%
Debugging Hard Easy (inspect each step)
Context control Limited Precise per step
Error recovery None Retry failed steps
Cost Lower Higher (more calls)

Basic Chaining Pattern

from anthropic import Anthropic

client = Anthropic()

def chain_prompts(user_query: str):
    # Step 1: Decompose query
    decompose_prompt = f"""Break this complex question into 2-3 simpler sub-questions:

Question: {user_query}

Sub-questions (numbered list):"""

    response1 = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=500,
        messages=[{"role": "user", "content": decompose_prompt}]
    )
    sub_questions = parse_list(response1.content[0].text)

    # Step 2: Answer each sub-question
    sub_answers = []
    for sq in sub_questions:
        answer_prompt = f"Answer this specific question concisely:nn{sq}"
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=300,
            messages=[{"role": "user", "content": answer_prompt}]
        )
        sub_answers.append(response.content[0].text)

    # Step 3: Synthesize final answer
    synthesis_prompt = f"""Original question: {user_query}

Sub-questions and answers:
{format_qa_pairs(sub_questions, sub_answers)}

Provide a comprehensive final answer to the original question:"""

    final_response = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=800,
        messages=[{"role": "user", "content": synthesis_prompt}]
    )

    return final_response.content[0].text

Chain-of-Thought (CoT) Prompting

Encourage step-by-step reasoning by explicitly requesting it:

def cot_prompt(problem: str):
    prompt = f"""Solve this problem step by step. Show your reasoning.

Problem: {problem}

Let's think through this step by step:
1."""

    response = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=1500,
        messages=[{"role": "user", "content": prompt}]
    )

    return response.content[0].text

# Example
result = cot_prompt("If a train travels 120 km in 1.5 hours, then slows down by 20%, how long will it take to travel another 100 km?")

Self-Consistency: Multiple Paths

Generate multiple reasoning paths and select the most consistent answer:

def self_consistency(question: str, n_samples: int = 5):
    answers = []

    for i in range(n_samples):
        prompt = f"""Solve this problem. Show your reasoning and final answer clearly.

Problem: {question}

Solution:"""

        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            temperature=0.7,  # sampling diversity
            messages=[{"role": "user", "content": prompt}]
        )

        answers.append(extract_final_answer(response.content[0].text))

    # Return most common answer
    from collections import Counter
    return Counter(answers).most_common(1)[0][0]

Tree of Thoughts (ToT)

Explore multiple reasoning branches and select the best path:

class ThoughtNode:
    def __init__(self, thought: str, parent=None):
        self.thought = thought
        self.parent = parent
        self.children = []
        self.score = 0

def tree_of_thoughts(problem: str, depth: int = 3, branching: int = 3):
    root = ThoughtNode("Problem: " + problem)

    def expand(node: ThoughtNode, current_depth: int):
        if current_depth >= depth:
            return

        # Generate multiple next thoughts
        prompt = f"""Given this reasoning so far:
{get_path_to_root(node)}

Generate {branching} different next steps or ideas to solve this problem.
Format: numbered list."""

        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=500,
            messages=[{"role": "user", "content": prompt}]
        )

        thoughts = parse_list(response.content[0].text)

        # Create child nodes and evaluate
        for thought in thoughts[:branching]:
            child = ThoughtNode(thought, parent=node)
            child.score = evaluate_thought(child)
            node.children.append(child)

        # Expand best child
        best_child = max(node.children, key=lambda n: n.score)
        expand(best_child, current_depth + 1)

    expand(root, 0)

    # Get best path
    return get_best_path(root)

def evaluate_thought(node: ThoughtNode):
    """Score a thought's promise (0-10)"""
    prompt = f"""Rate how promising this reasoning step is (0-10):

{get_path_to_root(node)}

Score (0-10):"""

    response = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=10,
        messages=[{"role": "user", "content": prompt}]
    )

    return int(response.content[0].text.strip())

ReAct: Reasoning + Acting

Interleave reasoning with tool use for grounded problem-solving:

def react_loop(question: str, max_steps: int = 5):
    history = []

    for step in range(max_steps):
        # Generate thought and action
        prompt = f"""Question: {question}

Previous steps:
{format_history(history)}

What should we do next? Format:
Thought: [your reasoning]
Action: [tool_name: parameters]"""

        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=300,
            messages=[{"role": "user", "content": prompt}]
        )

        text = response.content[0].text
        thought, action = parse_thought_action(text)

        # Execute action
        observation = execute_tool(action)

        history.append({
            "thought": thought,
            "action": action,
            "observation": observation
        })

        # Check if done
        if "FINISH" in action:
            return extract_answer(observation)

    return "Max steps reached"

# Example tools
def execute_tool(action: str):
    if action.startswith("search:"):
        query = action.split(":", 1)[1]
        return web_search(query)
    elif action.startswith("calculate:"):
        expr = action.split(":", 1)[1]
        return eval(expr)
    elif action.startswith("FINISH:"):
        return action.split(":", 1)[1]
    else:
        return "Unknown tool"

Prompt Optimization with DSPy

Automatically optimize prompts through examples:

import dspy

# Configure LLM
lm = dspy.Claude(model="claude-3-5-sonnet-20241022")
dspy.settings.configure(lm=lm)

# Define signature
class QuestionAnswering(dspy.Signature):
    """Answer questions with reasoning"""
    question = dspy.InputField()
    reasoning = dspy.OutputField(desc="step-by-step reasoning")
    answer = dspy.OutputField(desc="final answer")

# Create module
qa = dspy.ChainOfThought(QuestionAnswering)

# Example data
examples = [
    dspy.Example(
        question="What is 15% of 240?",
        answer="36"
    ).with_inputs("question"),
    # ... more examples
]

# Optimize prompts automatically
from dspy.teleprompt import BootstrapFewShot

optimizer = BootstrapFewShot(metric=exact_match)
optimized_qa = optimizer.compile(qa, trainset=examples)

# Use optimized version
result = optimized_qa(question="What is 23% of 450?")
print(result.answer)

Context Window Management

For long chains, manage context strategically:

class ContextManager:
    def __init__(self, max_tokens: int = 100000):
        self.max_tokens = max_tokens
        self.history = []

    def add_exchange(self, prompt: str, response: str):
        self.history.append({"prompt": prompt, "response": response})
        self._prune_if_needed()

    def _prune_if_needed(self):
        total = sum(estimate_tokens(h["prompt"]) + estimate_tokens(h["response"])
                   for h in self.history)

        while total > self.max_tokens and len(self.history) > 1:
            # Keep first (instructions) and last N exchanges
            self.history.pop(1)
            total = sum(estimate_tokens(h["prompt"]) + estimate_tokens(h["response"])
                       for h in self.history)

    def get_context(self) -> str:
        return "nn".join(
            f"User: {h['prompt']}nAssistant: {h['response']}"
            for h in self.history
        )

Error Recovery

Build robustness into chains:

def robust_chain(query: str, max_retries: int = 3):
    for attempt in range(max_retries):
        try:
            result = chain_prompts(query)

            # Validate result
            if is_valid(result):
                return result
            else:
                # Refine prompt and retry
                query = f"{query}nnNote: Previous attempt was invalid. Be more specific."

        except Exception as e:
            if attempt == max_retries - 1:
                raise
            time.sleep(2 ** attempt)  # exponential backoff

    return "Failed after retries"

def is_valid(response: str) -> bool:
    # Check for hallucination markers
    if "I don't have enough information" in response:
        return False
    if len(response) < 50:  # suspiciously short
        return False
    return True

Monitoring Chain Performance

import time
from langsmith import Client

langsmith = Client()

def traced_chain(query: str):
    run_id = langsmith.create_run(name="chained_qa", inputs={"query": query})

    try:
        start = time.time()

        # Step 1
        step1_start = time.time()
        sub_questions = decompose(query)
        langsmith.create_run(
            name="decompose",
            parent_run_id=run_id,
            inputs={"query": query},
            outputs={"sub_questions": sub_questions},
            execution_time=time.time() - step1_start
        )

        # Step 2
        step2_start = time.time()
        answers = [answer(sq) for sq in sub_questions]
        langsmith.create_run(
            name="answer_sub_questions",
            parent_run_id=run_id,
            inputs={"sub_questions": sub_questions},
            outputs={"answers": answers},
            execution_time=time.time() - step2_start
        )

        # Step 3
        step3_start = time.time()
        final = synthesize(query, answers)
        langsmith.create_run(
            name="synthesize",
            parent_run_id=run_id,
            inputs={"query": query, "answers": answers},
            outputs={"final": final},
            execution_time=time.time() - step3_start
        )

        langsmith.update_run(
            run_id,
            outputs={"answer": final},
            execution_time=time.time() - start,
            status="success"
        )

        return final

    except Exception as e:
        langsmith.update_run(run_id, status="error", error=str(e))
        raise

Cost Optimization

Chaining increases API calls. Optimize by:

  • Using smaller models for simple steps (Haiku for routing)
  • Caching intermediate results
  • Paralleling independent steps
  • Short-circuiting when confidence is high
def cost_optimized_chain(query: str):
    # Use cheap model to route
    is_complex = classify_complexity(query, model="claude-3-haiku-20240307")

    if not is_complex:
        # Simple query, single shot with cheap model
        return single_prompt(query, model="claude-3-haiku-20240307")
    else:
        # Complex query, full chain with powerful model
        return chain_prompts(query, model="claude-3-5-sonnet-20241022")