Human-in-the-Loop: Balancing AI Autonomy and Human Control

Fully autonomous AI agents are a fantasy—and a dangerous one. Even the best agents make mistakes, hallucinate, and encounter situations they can't handle.

Human-in-the-Loop (HITL) is the pattern that makes AI agents actually work in production. It's about knowing when to ask for help, when to pause for approval, and when to hand off to a human entirely.

This guide shows you how to build agents that collaborate with humans instead of trying to replace them.

Why Human-in-the-Loop Matters

The Autonomy Paradox

More autonomy sounds better, but:

text

1	┌─────────────────────────────────────────────────────────────┐
2	│ Autonomy vs. Risk │
3	├─────────────────────────────────────────────────────────────┤
4	│ │
5	│ Risk │ ╱ │
6	│ │ ╱ │
7	│ │ ╱ │
8	│ │ ╱ │
9	│ │ ╱ │
10	│ │ ╱ │
11	│ │ ╱ │
12	│ │ ╱ │
13	│ │ ╱ │
14	│ │ ╱ │
15	│ │ ╱ │
16	│ │ ╱ │
17	│ │ ╱ │
18	│ │ ╱ │
19	│ │╱ │
20	│ └──────────────────────────────────────────────────── │
21	│ Autonomy │
22	│ │
23	│ Low autonomy = Safe but slow │
24	│ High autonomy = Fast but risky │
25	│ HITL = Optimal balance for each situation │
26	│ │
27	└─────────────────────────────────────────────────────────────┘
28

The goal isn't maximum autonomy—it's appropriate autonomy for each situation.

When Agents Need Humans

High-stakes decisions: Deleting data, sending money, publishing content
Uncertainty: Low confidence, ambiguous requirements
Edge cases: Situations not in training data
Sensitive content: Legal, medical, financial advice
Learning opportunities: New patterns to incorporate

HITL Patterns

Pattern 1: Approval Gates

Pause for human approval before critical actions:

python

from enum import Enum
from dataclasses import dataclass
import time
 
class ActionRisk(Enum):
    LOW = "low"           # Proceed automatically
    MEDIUM = "medium"     # Log, but proceed
    HIGH = "high"         # Require approval
    CRITICAL = "critical" # Require multi-person approval
 
@dataclass
class PendingAction:
    action_id: str
    action_type: str
    description: str
    risk_level: ActionRisk
    context: dict
    created_at: float
    approved: bool = None
    approved_by: str = None
 
class ApprovalGateAgent:
    def __init__(self, approval_callback):
        self.approval_callback = approval_callback
        self.pending_actions = {}
        self.action_risks = {
            "read_file": ActionRisk.LOW,
            "write_file": ActionRisk.MEDIUM,
            "send_email": ActionRisk.HIGH,
            "delete_data": ActionRisk.CRITICAL,
            "execute_code": ActionRisk.MEDIUM,
            "make_payment": ActionRisk.CRITICAL,
        }
    
    def execute_action(self, action_type: str, params: dict) -> dict:
        risk = self.action_risks.get(action_type, ActionRisk.HIGH)
        
        if risk == ActionRisk.LOW:
            return self._execute(action_type, params)
        
        if risk == ActionRisk.MEDIUM:
            self._log_action(action_type, params)
            return self._execute(action_type, params)
        
        if risk in [ActionRisk.HIGH, ActionRisk.CRITICAL]:
            return self._request_approval(action_type, params, risk)
    
    def _request_approval(self, action_type: str, params: dict, risk: ActionRisk) -> dict:
        action = PendingAction(
            action_id=f"action_{time.time()}",
            action_type=action_type,
            description=self._describe_action(action_type, params),
            risk_level=risk,
            context=params,
            created_at=time.time()
        )
        
        self.pending_actions[action.action_id] = action
        
        # Request approval (async in production)
        approved = self.approval_callback(action)
        
        if approved:
            action.approved = True
            return self._execute(action_type, params)
        else:
            action.approved = False
            return {"status": "rejected", "action_id": action.action_id}
    
    def _describe_action(self, action_type: str, params: dict) -> str:
        descriptions = {
            "send_email": f"Send email to {params.get('to')} with subject '{params.get('subject')}'",
            "delete_data": f"Delete {params.get('count', 'unknown')} records from {params.get('table')}",
            "make_payment": f"Transfer ${params.get('amount')} to {params.get('recipient')}",
        }
        return descriptions.get(action_type, f"{action_type}: {params}")
 
 
# Usage with CLI approval
def cli_approval(action: PendingAction) -> bool:
    print(f"\n{'='*60}")
    print(f"🔔 APPROVAL REQUIRED")
    print(f"{'='*60}")
    print(f"Action: {action.action_type}")
    print(f"Risk: {action.risk_level.value.upper()}")
    print(f"Description: {action.description}")
    print(f"\nContext: {action.context}")
    
    response = input("\nApprove? (yes/no): ").strip().lower()
    return response == "yes"
 
agent = ApprovalGateAgent(approval_callback=cli_approval)
 
# Low risk - executes immediately
agent.execute_action("read_file", {"path": "/data/report.csv"})
 
# High risk - requires approval
agent.execute_action("send_email", {
    "to": "client@example.com",
    "subject": "Contract Update",
    "body": "..."
})
 

Pattern 2: Confidence-Based Escalation

Escalate to humans when confidence is low:

python

import openai
from dataclasses import dataclass
 
@dataclass
class AgentResponse:
    answer: str
    confidence: float
    reasoning: str
    needs_human: bool
 
class ConfidenceAgent:
    def __init__(self, confidence_threshold: float = 0.8):
        self.client = openai.OpenAI()
        self.threshold = confidence_threshold
    
    def answer(self, question: str) -> AgentResponse:
        # Get answer with confidence score
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "system",
                "content": """Answer the question and rate your confidence.
 
Return JSON:
{
    "answer": "your answer",
    "confidence": 0.0-1.0,
    "reasoning": "why this confidence level",
    "uncertain_aspects": ["aspect1", "aspect2"]
}
 
Be honest about uncertainty. Low confidence is better than wrong confidence."""
            }, {
                "role": "user",
                "content": question
            }],
            response_format={"type": "json_object"}
        )
        
        import json
        data = json.loads(response.choices[0].message.content)
        
        needs_human = data["confidence"] < self.threshold
        
        return AgentResponse(
            answer=data["answer"],
            confidence=data["confidence"],
            reasoning=data["reasoning"],
            needs_human=needs_human
        )
    
    def answer_with_fallback(self, question: str, human_callback) -> str:
        response = self.answer(question)
        
        if response.needs_human:
            print(f"⚠️ Low confidence ({response.confidence:.0%})")
            print(f"Reason: {response.reasoning}")
            print(f"\nProposed answer: {response.answer}")
            
            human_input = human_callback(question, response)
            
            if human_input:
                return human_input
        
        return response.answer
 
 
# Usage
agent = ConfidenceAgent(confidence_threshold=0.75)
 
def human_review(question: str, response: AgentResponse) -> str:
    print(f"\nQuestion: {question}")
    print(f"Agent's answer: {response.answer}")
    
    action = input("Accept (a), Modify (m), or Provide new (n)? ").strip().lower()
    
    if action == "a":
        return response.answer
    elif action == "m":
        return input("Enter modified answer: ")
    elif action == "n":
        return input("Enter your answer: ")
    
    return response.answer
 
answer = agent.answer_with_fallback(
    "What's the best database for a real-time analytics system processing 1M events/second?",
    human_callback=human_review
)
 

Pattern 3: Interactive Clarification

Ask humans for clarification when requirements are ambiguous:

python

class ClarifyingAgent:
    def __init__(self):
        self.client = openai.OpenAI()
        self.max_clarifications = 3
    
    def process(self, request: str, clarification_callback) -> str:
        context = {"original_request": request, "clarifications": []}
        
        for i in range(self.max_clarifications):
            # Check if we need clarification
            analysis = self._analyze_request(request, context)
            
            if analysis["clear_enough"]:
                break
            
            # Ask for clarification
            question = analysis["clarification_question"]
            answer = clarification_callback(question)
            
            context["clarifications"].append({
                "question": question,
                "answer": answer
            })
            
            # Update request with clarification
            request = self._incorporate_clarification(request, question, answer)
        
        # Execute with full context
        return self._execute(request, context)
    
    def _analyze_request(self, request: str, context: dict) -> dict:
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "system",
                "content": """Analyze if this request is clear enough to execute.
 
Return JSON:
{
    "clear_enough": true/false,
    "ambiguities": ["ambiguity 1", "ambiguity 2"],
    "clarification_question": "question to ask user (if not clear)",
    "assumptions": ["assumption if we proceeded without clarifying"]
}
 
Ask for clarification only if the ambiguity could lead to significantly different outcomes."""
            }, {
                "role": "user",
                "content": f"Request: {request}\n\nPrevious clarifications: {context.get('clarifications', [])}"
            }],
            response_format={"type": "json_object"}
        )
        
        import json
        return json.loads(response.choices[0].message.content)
    
    def _incorporate_clarification(self, request: str, question: str, answer: str) -> str:
        return f"{request}\n\nClarification - Q: {question} A: {answer}"
 
 
# Usage
agent = ClarifyingAgent()
 
def ask_user(question: str) -> str:
    print(f"\n❓ {question}")
    return input("Your answer: ")
 
result = agent.process(
    "Create a report of our sales data",  # Ambiguous!
    clarification_callback=ask_user
)
 
# Agent might ask:
# "Which time period should the report cover?"
# "Should the report include all products or specific categories?"
# "Who is the audience - executives or analysts?"
 

Pattern 4: Supervised Learning Loop

Learn from human corrections:

python

from dataclasses import dataclass
from datetime import datetime
import json
 
@dataclass
class Correction:
    original_output: str
    corrected_output: str
    correction_reason: str
    task_type: str
    timestamp: datetime
 
class LearningAgent:
    def __init__(self):
        self.client = openai.OpenAI()
        self.corrections_db = []  # In production, use a real database
    
    def process(self, task: str) -> str:
        # Get relevant past corrections
        relevant_corrections = self._find_relevant_corrections(task)
        
        # Generate with learned context
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "system",
                "content": f"""Complete the task.
 
Learn from these past corrections:
{self._format_corrections(relevant_corrections)}
 
Apply these lessons to avoid similar mistakes."""
            }, {
                "role": "user",
                "content": task
            }]
        )
        
        return response.choices[0].message.content
    
    def record_correction(self, original: str, corrected: str, reason: str, task_type: str):
        """Human provides correction - agent learns"""
        correction = Correction(
            original_output=original,
            corrected_output=corrected,
            correction_reason=reason,
            task_type=task_type,
            timestamp=datetime.now()
        )
        
        self.corrections_db.append(correction)
        
        # In production: fine-tune or update embeddings
        self._update_knowledge(correction)
    
    def _find_relevant_corrections(self, task: str, limit: int = 5) -> list:
        # In production: semantic search over corrections
        return self.corrections_db[-limit:]
    
    def _format_corrections(self, corrections: list) -> str:
        if not corrections:
            return "(No relevant past corrections)"
        
        formatted = []
        for c in corrections:
            formatted.append(f"""
Mistake: {c.original_output[:200]}...
Correction: {c.corrected_output[:200]}...
Reason: {c.correction_reason}
""")
        return "\n---\n".join(formatted)
 
 
# Usage
agent = LearningAgent()
 
# Agent makes a mistake
output = agent.process("Write an email to decline a meeting")
print(output)  # "Dear Sir, I am writing to inform you..."
 
# Human corrects
agent.record_correction(
    original=output,
    corrected="Hi [Name], Thanks for the invite! Unfortunately, I have a conflict...",
    reason="Too formal. Use casual, friendly tone for internal communications.",
    task_type="email_writing"
)
 
# Next time, agent applies the lesson
output = agent.process("Write an email to reschedule a call")
# Now uses appropriate casual tone
 

Building a Complete HITL System

Here's a production-ready HITL agent:

python

from hopx import Sandbox
import openai
import json
from enum import Enum
from dataclasses import dataclass, field
from datetime import datetime
from typing import Callable, Optional
import asyncio
 
class EscalationType(Enum):
    APPROVAL = "approval"
    CLARIFICATION = "clarification"
    REVIEW = "review"
    HANDOFF = "handoff"
 
@dataclass
class EscalationRequest:
    id: str
    type: EscalationType
    context: dict
    message: str
    options: list = field(default_factory=list)
    timeout_seconds: int = 300
    created_at: datetime = field(default_factory=datetime.now)
 
@dataclass
class EscalationResponse:
    approved: bool
    response: str
    responder: str
    timestamp: datetime = field(default_factory=datetime.now)
 
class HITLAgent:
    def __init__(
        self,
        escalation_handler: Callable[[EscalationRequest], EscalationResponse],
        confidence_threshold: float = 0.8,
        auto_approve_risks: list = None
    ):
        self.client = openai.OpenAI()
        self.escalation_handler = escalation_handler
        self.confidence_threshold = confidence_threshold
        self.auto_approve_risks = auto_approve_risks or ["low"]
        self.action_log = []
    
    async def run(self, task: str) -> dict:
        """Execute task with human-in-the-loop checkpoints"""
        
        # Step 1: Understand and validate task
        understanding = await self._understand_task(task)
        
        if understanding["needs_clarification"]:
            clarification = await self._request_clarification(
                task, 
                understanding["questions"]
            )
            task = f"{task}\n\nClarifications:\n{clarification}"
        
        # Step 2: Plan with risk assessment
        plan = await self._create_plan(task)
        
        # Step 3: Get approval for high-risk steps
        if any(step["risk"] not in self.auto_approve_risks for step in plan["steps"]):
            approved = await self._request_plan_approval(plan)
            if not approved:
                return {"status": "rejected", "reason": "Plan not approved"}
        
        # Step 4: Execute with checkpoints
        results = []
        for step in plan["steps"]:
            result = await self._execute_step(step)
            results.append(result)
            
            # Check for issues requiring escalation
            if result.get("needs_review"):
                review = await self._request_review(step, result)
                if not review.approved:
                    return {"status": "stopped", "reason": review.response}
        
        # Step 5: Final review for high-stakes tasks
        if plan.get("requires_final_review"):
            final_review = await self._request_final_review(task, results)
            if not final_review.approved:
                return {"status": "needs_revision", "feedback": final_review.response}
        
        return {
            "status": "completed",
            "results": results,
            "plan": plan
        }
    
    async def _understand_task(self, task: str) -> dict:
        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "system",
                "content": """Analyze this task:
1. Is it clear enough to proceed?
2. What clarifications would help?
3. What's the risk level?
4. What approvals might be needed?
 
Return JSON:
{
    "clear": true/false,
    "needs_clarification": true/false,
    "questions": ["question1", "question2"],
    "risk_level": "low/medium/high/critical",
    "potential_issues": ["issue1"]
}"""
            }, {
                "role": "user",
                "content": task
            }],
            response_format={"type": "json_object"}
        )
        return json.loads(response.choices[0].message.content)
    
    async def _request_clarification(self, task: str, questions: list) -> str:
        request = EscalationRequest(
            id=f"clarify_{datetime.now().timestamp()}",
            type=EscalationType.CLARIFICATION,
            context={"task": task},
            message="Please clarify the following:",
            options=questions
        )
        
        response = self.escalation_handler(request)
        return response.response
    
    async def _request_plan_approval(self, plan: dict) -> bool:
        high_risk_steps = [s for s in plan["steps"] if s["risk"] not in self.auto_approve_risks]
        
        request = EscalationRequest(
            id=f"approve_{datetime.now().timestamp()}",
            type=EscalationType.APPROVAL,
            context={"plan": plan},
            message=f"Approve {len(high_risk_steps)} high-risk actions?",
            options=["Approve All", "Reject", "Review Each"]
        )
        
        response = self.escalation_handler(request)
        return response.approved
    
    async def _execute_step(self, step: dict) -> dict:
        """Execute step with monitoring"""
        
        self.action_log.append({
            "step": step,
            "started_at": datetime.now().isoformat()
        })
        
        if step.get("requires_code"):
            result = await self._execute_code(step["code"])
        else:
            result = await self._execute_action(step)
        
        self.action_log[-1]["result"] = result
        self.action_log[-1]["completed_at"] = datetime.now().isoformat()
        
        # Check if result needs human review
        if result.get("error") or result.get("unexpected"):
            result["needs_review"] = True
        
        return result
    
    async def _request_review(self, step: dict, result: dict) -> EscalationResponse:
        request = EscalationRequest(
            id=f"review_{datetime.now().timestamp()}",
            type=EscalationType.REVIEW,
            context={"step": step, "result": result},
            message=f"Step encountered an issue: {result.get('error', 'Unexpected result')}",
            options=["Continue", "Retry", "Abort", "Modify and Continue"]
        )
        
        return self.escalation_handler(request)
    
    async def _request_final_review(self, task: str, results: list) -> EscalationResponse:
        request = EscalationRequest(
            id=f"final_{datetime.now().timestamp()}",
            type=EscalationType.REVIEW,
            context={"task": task, "results": results},
            message="Please review the completed task before finalizing.",
            options=["Approve", "Request Changes", "Reject"]
        )
        
        return self.escalation_handler(request)
 
 
# Example: Slack-based escalation handler
class SlackEscalationHandler:
    def __init__(self, channel: str, bot_token: str):
        self.channel = channel
        self.bot_token = bot_token
        self.pending = {}
    
    def __call__(self, request: EscalationRequest) -> EscalationResponse:
        # Send to Slack
        message = self._format_message(request)
        self._send_slack_message(message)
        
        # Wait for response (with timeout)
        response = self._wait_for_response(request.id, request.timeout_seconds)
        
        return response
    
    def _format_message(self, request: EscalationRequest) -> dict:
        blocks = [
            {
                "type": "header",
                "text": {"type": "plain_text", "text": f"🔔 {request.type.value.upper()} Required"}
            },
            {
                "type": "section",
                "text": {"type": "mrkdwn", "text": request.message}
            },
            {
                "type": "actions",
                "elements": [
                    {"type": "button", "text": {"type": "plain_text", "text": opt}, "action_id": f"opt_{i}"}
                    for i, opt in enumerate(request.options)
                ]
            }
        ]
        return {"channel": self.channel, "blocks": blocks}
 
 
# Usage
async def main():
    handler = SlackEscalationHandler(channel="#ai-approvals", bot_token="xoxb-...")
    
    agent = HITLAgent(
        escalation_handler=handler,
        confidence_threshold=0.8,
        auto_approve_risks=["low", "medium"]
    )
    
    result = await agent.run(
        "Analyze our customer data and send a summary report to the executive team"
    )
    
    print(result)
 
# asyncio.run(main())
 

HITL Interface Patterns

Web-Based Approval Queue

python

from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
import json
 
app = FastAPI()
approval_queue = []
connected_clients = []
 
@app.websocket("/ws/approvals")
async def approval_websocket(websocket: WebSocket):
    await websocket.accept()
    connected_clients.append(websocket)
    
    try:
        while True:
            # Receive approval/rejection from UI
            data = await websocket.receive_json()
            
            action_id = data["action_id"]
            approved = data["approved"]
            
            # Process the response
            handle_approval_response(action_id, approved, data.get("comment"))
    finally:
        connected_clients.remove(websocket)
 
async def request_approval(action: dict) -> bool:
    """Send approval request to all connected clients"""
    approval_queue.append(action)
    
    for client in connected_clients:
        await client.send_json({
            "type": "approval_request",
            "action": action
        })
    
    # Wait for response (implement with asyncio.Event)
    response = await wait_for_approval(action["id"])
    return response
 
@app.get("/approvals")
async def approval_ui():
    return HTMLResponse("""
    <html>
        <head><title>AI Agent Approvals</title></head>
        <body>
            <h1>Pending Approvals</h1>
            <div id="approvals"></div>
            <script>
                const ws = new WebSocket('ws://localhost:8000/ws/approvals');
                
                ws.onmessage = (event) => {
                    const data = JSON.parse(event.data);
                    if (data.type === 'approval_request') {
                        showApprovalRequest(data.action);
                    }
                };
                
                function approve(actionId) {
                    ws.send(JSON.stringify({
                        action_id: actionId,
                        approved: true
                    }));
                }
                
                function reject(actionId) {
                    ws.send(JSON.stringify({
                        action_id: actionId,
                        approved: false,
                        comment: prompt('Reason for rejection:')
                    }));
                }
            </script>
        </body>
    </html>
    """)
 

Best Practices

1. Default to Asking

python

# ❌ Optimistic (dangerous)
def execute(self, action):
    return self._do_action(action)
 
# ✅ Conservative (safe)
def execute(self, action):
    if self._is_safe(action):
        return self._do_action(action)
    else:
        return self._request_approval(action)
 

2. Provide Context

python

# ❌ Vague approval request
"Approve action?"
 
# ✅ Rich context
f"""
Action: {action_type}
Target: {target}
Impact: {impact_description}
Risk Level: {risk}
Reversible: {is_reversible}
Similar past actions: {past_examples}
 
Agent's reasoning: {reasoning}
"""
 

3. Time-Box Decisions

python

async def request_with_timeout(self, request, timeout=300):
    try:
        response = await asyncio.wait_for(
            self._get_human_response(request),
            timeout=timeout
        )
        return response
    except asyncio.TimeoutError:
        # Default to safe action on timeout
        return self._safe_default(request)
 

4. Learn from Decisions

python

def record_decision(self, request, response, outcome):
    """Track decisions to improve future automation"""
    self.decisions.append({
        "request": request,
        "response": response,
        "outcome": outcome,
        "timestamp": datetime.now()
    })
    
    # Analyze patterns
    if self._should_automate(request.type):
        self._add_to_auto_approve(request.type)
 

When to Use HITL

Situation	HITL Approach
Financial transactions	Approval gate
Content publishing	Review before publish
Data deletion	Confirmation + undo period
Customer communications	Template approval
System configuration	Change approval
Ambiguous requests	Clarification
Low confidence	Escalation
First-time actions	Approval, then learn

Conclusion

Human-in-the-Loop isn't about limiting AI—it's about building AI systems that actually work in production:

Approval gates for high-risk actions
Confidence-based escalation for uncertainty
Clarification loops for ambiguous requests
Learning from corrections to improve over time

Start with conservative settings (more human involvement). Gradually increase autonomy as trust builds. Always have a human escalation path.

The agent that knows when to ask for help outperforms the agent that doesn't. Every time.

Ready to build collaborative human-AI systems? Get started with HopX — sandboxes that provide safe execution while humans review.

Human-in-the-Loop: Balancing AI Autonomy and Human Control

Human-in-the-Loop: Balancing AI Autonomy and Human Control

Why Human-in-the-Loop Matters

The Autonomy Paradox

When Agents Need Humans

HITL Patterns

Pattern 1: Approval Gates

Pattern 2: Confidence-Based Escalation

Pattern 3: Interactive Clarification

Pattern 4: Supervised Learning Loop

Building a Complete HITL System

HITL Interface Patterns

Web-Based Approval Queue

Best Practices

1. Default to Asking

2. Provide Context

3. Time-Box Decisions

4. Learn from Decisions

When to Use HITL

Conclusion

Further Reading

Related articles

Evaluator-Optimizer Loop: Continuous AI Agent Improvement

Memory for AI Agents: Short-term, Long-term, and RAG

The Orchestrator Pattern: Coordinating Complex AI Agent Workflows

1	from enum import Enum
2	from dataclasses import dataclass
3	import time
4
5	class ActionRisk(Enum):
6	LOW = "low" # Proceed automatically
7	MEDIUM = "medium" # Log, but proceed
8	HIGH = "high" # Require approval
9	CRITICAL = "critical" # Require multi-person approval
10
11	@dataclass
12	class PendingAction:
13	action_id: str
14	action_type: str
15	description: str
16	risk_level: ActionRisk
17	context: dict
18	created_at: float
19	approved: bool = None
20	approved_by: str = None
21
22	class ApprovalGateAgent:
23	def __init__(self, approval_callback):
24	self.approval_callback = approval_callback
25	self.pending_actions = {}
26	self.action_risks = {
27	"read_file": ActionRisk.LOW,
28	"write_file": ActionRisk.MEDIUM,
29	"send_email": ActionRisk.HIGH,
30	"delete_data": ActionRisk.CRITICAL,
31	"execute_code": ActionRisk.MEDIUM,
32	"make_payment": ActionRisk.CRITICAL,
33	}
34
35	def execute_action(self, action_type: str, params: dict) -> dict:
36	risk = self.action_risks.get(action_type, ActionRisk.HIGH)
37
38	if risk == ActionRisk.LOW:
39	return self._execute(action_type, params)
40
41	if risk == ActionRisk.MEDIUM:
42	self._log_action(action_type, params)
43	return self._execute(action_type, params)
44
45	if risk in [ActionRisk.HIGH, ActionRisk.CRITICAL]:
46	return self._request_approval(action_type, params, risk)
47
48	def _request_approval(self, action_type: str, params: dict, risk: ActionRisk) -> dict:
49	action = PendingAction(
50	action_id=f"action_{time.time()}",
51	action_type=action_type,
52	description=self._describe_action(action_type, params),
53	risk_level=risk,
54	context=params,
55	created_at=time.time()
56	)
57
58	self.pending_actions[action.action_id] = action
59
60	# Request approval (async in production)
61	approved = self.approval_callback(action)
62
63	if approved:
64	action.approved = True
65	return self._execute(action_type, params)
66	else:
67	action.approved = False
68	return {"status": "rejected", "action_id": action.action_id}
69
70	def _describe_action(self, action_type: str, params: dict) -> str:
71	descriptions = {
72	"send_email": f"Send email to {params.get('to')} with subject '{params.get('subject')}'",
73	"delete_data": f"Delete {params.get('count', 'unknown')} records from {params.get('table')}",
74	"make_payment": f"Transfer ${params.get('amount')} to {params.get('recipient')}",
75	}
76	return descriptions.get(action_type, f"{action_type}: {params}")
77
78
79	# Usage with CLI approval
80	def cli_approval(action: PendingAction) -> bool:
81	print(f"\n{'='*60}")
82	print(f"🔔 APPROVAL REQUIRED")
83	print(f"{'='*60}")
84	print(f"Action: {action.action_type}")
85	print(f"Risk: {action.risk_level.value.upper()}")
86	print(f"Description: {action.description}")
87	print(f"\nContext: {action.context}")
88
89	response = input("\nApprove? (yes/no): ").strip().lower()
90	return response == "yes"
91
92	agent = ApprovalGateAgent(approval_callback=cli_approval)
93
94	# Low risk - executes immediately
95	agent.execute_action("read_file", {"path": "/data/report.csv"})
96
97	# High risk - requires approval
98	agent.execute_action("send_email", {
99	"to": "client@example.com",
100	"subject": "Contract Update",
101	"body": "..."
102	})
103

1	import openai
2	from dataclasses import dataclass
3
4	@dataclass
5	class AgentResponse:
6	answer: str
7	confidence: float
8	reasoning: str
9	needs_human: bool
10
11	class ConfidenceAgent:
12	def __init__(self, confidence_threshold: float = 0.8):
13	self.client = openai.OpenAI()
14	self.threshold = confidence_threshold
15
16	def answer(self, question: str) -> AgentResponse:
17	# Get answer with confidence score
18	response = self.client.chat.completions.create(
19	model="gpt-4o",
20	messages=[{
21	"role": "system",
22	"content": """Answer the question and rate your confidence.
23
24	Return JSON:
25	{
26	"answer": "your answer",
27	"confidence": 0.0-1.0,
28	"reasoning": "why this confidence level",
29	"uncertain_aspects": ["aspect1", "aspect2"]
30	}
31
32	Be honest about uncertainty. Low confidence is better than wrong confidence."""
33	}, {
34	"role": "user",
35	"content": question
36	}],
37	response_format={"type": "json_object"}
38	)
39
40	import json
41	data = json.loads(response.choices[0].message.content)
42
43	needs_human = data["confidence"] < self.threshold
44
45	return AgentResponse(
46	answer=data["answer"],
47	confidence=data["confidence"],
48	reasoning=data["reasoning"],
49	needs_human=needs_human
50	)
51
52	def answer_with_fallback(self, question: str, human_callback) -> str:
53	response = self.answer(question)
54
55	if response.needs_human:
56	print(f"⚠️ Low confidence ({response.confidence:.0%})")
57	print(f"Reason: {response.reasoning}")
58	print(f"\nProposed answer: {response.answer}")
59
60	human_input = human_callback(question, response)
61
62	if human_input:
63	return human_input
64
65	return response.answer
66
67
68	# Usage
69	agent = ConfidenceAgent(confidence_threshold=0.75)
70
71	def human_review(question: str, response: AgentResponse) -> str:
72	print(f"\nQuestion: {question}")
73	print(f"Agent's answer: {response.answer}")
74
75	action = input("Accept (a), Modify (m), or Provide new (n)? ").strip().lower()
76
77	if action == "a":
78	return response.answer
79	elif action == "m":
80	return input("Enter modified answer: ")
81	elif action == "n":
82	return input("Enter your answer: ")
83
84	return response.answer
85
86	answer = agent.answer_with_fallback(
87	"What's the best database for a real-time analytics system processing 1M events/second?",
88	human_callback=human_review
89	)
90

1	class ClarifyingAgent:
2	def __init__(self):
3	self.client = openai.OpenAI()
4	self.max_clarifications = 3
5
6	def process(self, request: str, clarification_callback) -> str:
7	context = {"original_request": request, "clarifications": []}
8
9	for i in range(self.max_clarifications):
10	# Check if we need clarification
11	analysis = self._analyze_request(request, context)
12
13	if analysis["clear_enough"]:
14	break
15
16	# Ask for clarification
17	question = analysis["clarification_question"]
18	answer = clarification_callback(question)
19
20	context["clarifications"].append({
21	"question": question,
22	"answer": answer
23	})
24
25	# Update request with clarification
26	request = self._incorporate_clarification(request, question, answer)
27
28	# Execute with full context
29	return self._execute(request, context)
30
31	def _analyze_request(self, request: str, context: dict) -> dict:
32	response = self.client.chat.completions.create(
33	model="gpt-4o",
34	messages=[{
35	"role": "system",
36	"content": """Analyze if this request is clear enough to execute.
37
38	Return JSON:
39	{
40	"clear_enough": true/false,
41	"ambiguities": ["ambiguity 1", "ambiguity 2"],
42	"clarification_question": "question to ask user (if not clear)",
43	"assumptions": ["assumption if we proceeded without clarifying"]
44	}
45
46	Ask for clarification only if the ambiguity could lead to significantly different outcomes."""
47	}, {
48	"role": "user",
49	"content": f"Request: {request}\n\nPrevious clarifications: {context.get('clarifications', [])}"
50	}],
51	response_format={"type": "json_object"}
52	)
53
54	import json
55	return json.loads(response.choices[0].message.content)
56
57	def _incorporate_clarification(self, request: str, question: str, answer: str) -> str:
58	return f"{request}\n\nClarification - Q: {question} A: {answer}"
59
60
61	# Usage
62	agent = ClarifyingAgent()
63
64	def ask_user(question: str) -> str:
65	print(f"\n❓ {question}")
66	return input("Your answer: ")
67
68	result = agent.process(
69	"Create a report of our sales data", # Ambiguous!
70	clarification_callback=ask_user
71	)
72
73	# Agent might ask:
74	# "Which time period should the report cover?"
75	# "Should the report include all products or specific categories?"
76	# "Who is the audience - executives or analysts?"
77

1	from dataclasses import dataclass
2	from datetime import datetime
3	import json
4
5	@dataclass
6	class Correction:
7	original_output: str
8	corrected_output: str
9	correction_reason: str
10	task_type: str
11	timestamp: datetime
12
13	class LearningAgent:
14	def __init__(self):
15	self.client = openai.OpenAI()
16	self.corrections_db = [] # In production, use a real database
17
18	def process(self, task: str) -> str:
19	# Get relevant past corrections
20	relevant_corrections = self._find_relevant_corrections(task)
21
22	# Generate with learned context
23	response = self.client.chat.completions.create(
24	model="gpt-4o",
25	messages=[{
26	"role": "system",
27	"content": f"""Complete the task.
28
29	Learn from these past corrections:
30	{self._format_corrections(relevant_corrections)}
31
32	Apply these lessons to avoid similar mistakes."""
33	}, {
34	"role": "user",
35	"content": task
36	}]
37	)
38
39	return response.choices[0].message.content
40
41	def record_correction(self, original: str, corrected: str, reason: str, task_type: str):
42	"""Human provides correction - agent learns"""
43	correction = Correction(
44	original_output=original,
45	corrected_output=corrected,
46	correction_reason=reason,
47	task_type=task_type,
48	timestamp=datetime.now()
49	)
50
51	self.corrections_db.append(correction)
52
53	# In production: fine-tune or update embeddings
54	self._update_knowledge(correction)
55
56	def _find_relevant_corrections(self, task: str, limit: int = 5) -> list:
57	# In production: semantic search over corrections
58	return self.corrections_db[-limit:]
59
60	def _format_corrections(self, corrections: list) -> str:
61	if not corrections:
62	return "(No relevant past corrections)"
63
64	formatted = []
65	for c in corrections:
66	formatted.append(f"""
67	Mistake: {c.original_output[:200]}...
68	Correction: {c.corrected_output[:200]}...
69	Reason: {c.correction_reason}
70	""")
71	return "\n---\n".join(formatted)
72
73
74	# Usage
75	agent = LearningAgent()
76
77	# Agent makes a mistake
78	output = agent.process("Write an email to decline a meeting")
79	print(output) # "Dear Sir, I am writing to inform you..."
80
81	# Human corrects
82	agent.record_correction(
83	original=output,
84	corrected="Hi [Name], Thanks for the invite! Unfortunately, I have a conflict...",
85	reason="Too formal. Use casual, friendly tone for internal communications.",
86	task_type="email_writing"
87	)
88
89	# Next time, agent applies the lesson
90	output = agent.process("Write an email to reschedule a call")
91	# Now uses appropriate casual tone
92

1	from hopx import Sandbox
2	import openai
3	import json
4	from enum import Enum
5	from dataclasses import dataclass, field
6	from datetime import datetime
7	from typing import Callable, Optional
8	import asyncio
9
10	class EscalationType(Enum):
11	APPROVAL = "approval"
12	CLARIFICATION = "clarification"
13	REVIEW = "review"
14	HANDOFF = "handoff"
15
16	@dataclass
17	class EscalationRequest:
18	id: str
19	type: EscalationType
20	context: dict
21	message: str
22	options: list = field(default_factory=list)
23	timeout_seconds: int = 300
24	created_at: datetime = field(default_factory=datetime.now)
25
26	@dataclass
27	class EscalationResponse:
28	approved: bool
29	response: str
30	responder: str
31	timestamp: datetime = field(default_factory=datetime.now)
32
33	class HITLAgent:
34	def __init__(
35	self,
36	escalation_handler: Callable[[EscalationRequest], EscalationResponse],
37	confidence_threshold: float = 0.8,
38	auto_approve_risks: list = None
39	):
40	self.client = openai.OpenAI()
41	self.escalation_handler = escalation_handler
42	self.confidence_threshold = confidence_threshold
43	self.auto_approve_risks = auto_approve_risks or ["low"]
44	self.action_log = []
45
46	async def run(self, task: str) -> dict:
47	"""Execute task with human-in-the-loop checkpoints"""
48
49	# Step 1: Understand and validate task
50	understanding = await self._understand_task(task)
51
52	if understanding["needs_clarification"]:
53	clarification = await self._request_clarification(
54	task,
55	understanding["questions"]
56	)
57	task = f"{task}\n\nClarifications:\n{clarification}"
58
59	# Step 2: Plan with risk assessment
60	plan = await self._create_plan(task)
61
62	# Step 3: Get approval for high-risk steps
63	if any(step["risk"] not in self.auto_approve_risks for step in plan["steps"]):
64	approved = await self._request_plan_approval(plan)
65	if not approved:
66	return {"status": "rejected", "reason": "Plan not approved"}
67
68	# Step 4: Execute with checkpoints
69	results = []
70	for step in plan["steps"]:
71	result = await self._execute_step(step)
72	results.append(result)
73
74	# Check for issues requiring escalation
75	if result.get("needs_review"):
76	review = await self._request_review(step, result)
77	if not review.approved:
78	return {"status": "stopped", "reason": review.response}
79
80	# Step 5: Final review for high-stakes tasks
81	if plan.get("requires_final_review"):
82	final_review = await self._request_final_review(task, results)
83	if not final_review.approved:
84	return {"status": "needs_revision", "feedback": final_review.response}
85
86	return {
87	"status": "completed",
88	"results": results,
89	"plan": plan
90	}
91
92	async def _understand_task(self, task: str) -> dict:
93	response = self.client.chat.completions.create(
94	model="gpt-4o",
95	messages=[{
96	"role": "system",
97	"content": """Analyze this task:
98	1. Is it clear enough to proceed?
99	2. What clarifications would help?
100	3. What's the risk level?
101	4. What approvals might be needed?
102
103	Return JSON:
104	{
105	"clear": true/false,
106	"needs_clarification": true/false,
107	"questions": ["question1", "question2"],
108	"risk_level": "low/medium/high/critical",
109	"potential_issues": ["issue1"]
110	}"""
111	}, {
112	"role": "user",
113	"content": task
114	}],
115	response_format={"type": "json_object"}
116	)
117	return json.loads(response.choices[0].message.content)
118
119	async def _request_clarification(self, task: str, questions: list) -> str:
120	request = EscalationRequest(
121	id=f"clarify_{datetime.now().timestamp()}",
122	type=EscalationType.CLARIFICATION,
123	context={"task": task},
124	message="Please clarify the following:",
125	options=questions
126	)
127
128	response = self.escalation_handler(request)
129	return response.response
130
131	async def _request_plan_approval(self, plan: dict) -> bool:
132	high_risk_steps = [s for s in plan["steps"] if s["risk"] not in self.auto_approve_risks]
133
134	request = EscalationRequest(
135	id=f"approve_{datetime.now().timestamp()}",
136	type=EscalationType.APPROVAL,
137	context={"plan": plan},
138	message=f"Approve {len(high_risk_steps)} high-risk actions?",
139	options=["Approve All", "Reject", "Review Each"]
140	)
141
142	response = self.escalation_handler(request)
143	return response.approved
144
145	async def _execute_step(self, step: dict) -> dict:
146	"""Execute step with monitoring"""
147
148	self.action_log.append({
149	"step": step,
150	"started_at": datetime.now().isoformat()
151	})
152
153	if step.get("requires_code"):
154	result = await self._execute_code(step["code"])
155	else:
156	result = await self._execute_action(step)
157
158	self.action_log[-1]["result"] = result
159	self.action_log[-1]["completed_at"] = datetime.now().isoformat()
160
161	# Check if result needs human review
162	if result.get("error") or result.get("unexpected"):
163	result["needs_review"] = True
164
165	return result
166
167	async def _request_review(self, step: dict, result: dict) -> EscalationResponse:
168	request = EscalationRequest(
169	id=f"review_{datetime.now().timestamp()}",
170	type=EscalationType.REVIEW,
171	context={"step": step, "result": result},
172	message=f"Step encountered an issue: {result.get('error', 'Unexpected result')}",
173	options=["Continue", "Retry", "Abort", "Modify and Continue"]
174	)
175
176	return self.escalation_handler(request)
177
178	async def _request_final_review(self, task: str, results: list) -> EscalationResponse:
179	request = EscalationRequest(
180	id=f"final_{datetime.now().timestamp()}",
181	type=EscalationType.REVIEW,
182	context={"task": task, "results": results},
183	message="Please review the completed task before finalizing.",
184	options=["Approve", "Request Changes", "Reject"]
185	)
186
187	return self.escalation_handler(request)
188
189
190	# Example: Slack-based escalation handler
191	class SlackEscalationHandler:
192	def __init__(self, channel: str, bot_token: str):
193	self.channel = channel
194	self.bot_token = bot_token
195	self.pending = {}
196
197	def __call__(self, request: EscalationRequest) -> EscalationResponse:
198	# Send to Slack
199	message = self._format_message(request)
200	self._send_slack_message(message)
201
202	# Wait for response (with timeout)
203	response = self._wait_for_response(request.id, request.timeout_seconds)
204
205	return response
206
207	def _format_message(self, request: EscalationRequest) -> dict:
208	blocks = [
209	{
210	"type": "header",
211	"text": {"type": "plain_text", "text": f"🔔 {request.type.value.upper()} Required"}
212	},
213	{
214	"type": "section",
215	"text": {"type": "mrkdwn", "text": request.message}
216	},
217	{
218	"type": "actions",
219	"elements": [
220	{"type": "button", "text": {"type": "plain_text", "text": opt}, "action_id": f"opt_{i}"}
221	for i, opt in enumerate(request.options)
222	]
223	}
224	]
225	return {"channel": self.channel, "blocks": blocks}
226
227
228	# Usage
229	async def main():
230	handler = SlackEscalationHandler(channel="#ai-approvals", bot_token="xoxb-...")
231
232	agent = HITLAgent(
233	escalation_handler=handler,
234	confidence_threshold=0.8,
235	auto_approve_risks=["low", "medium"]
236	)
237
238	result = await agent.run(
239	"Analyze our customer data and send a summary report to the executive team"
240	)
241
242	print(result)
243
244	# asyncio.run(main())
245

1	from fastapi import FastAPI, WebSocket
2	from fastapi.responses import HTMLResponse
3	import json
4
5	app = FastAPI()
6	approval_queue = []
7	connected_clients = []
8
9	@app.websocket("/ws/approvals")
10	async def approval_websocket(websocket: WebSocket):
11	await websocket.accept()
12	connected_clients.append(websocket)
13
14	try:
15	while True:
16	# Receive approval/rejection from UI
17	data = await websocket.receive_json()
18
19	action_id = data["action_id"]
20	approved = data["approved"]
21
22	# Process the response
23	handle_approval_response(action_id, approved, data.get("comment"))
24	finally:
25	connected_clients.remove(websocket)
26
27	async def request_approval(action: dict) -> bool:
28	"""Send approval request to all connected clients"""
29	approval_queue.append(action)
30
31	for client in connected_clients:
32	await client.send_json({
33	"type": "approval_request",
34	"action": action
35	})
36
37	# Wait for response (implement with asyncio.Event)
38	response = await wait_for_approval(action["id"])
39	return response
40
41	@app.get("/approvals")
42	async def approval_ui():
43	return HTMLResponse("""
44	<html>
45	<head><title>AI Agent Approvals</title></head>
46	<body>
47	<h1>Pending Approvals</h1>
48	<div id="approvals"></div>
49	<script>
50	const ws = new WebSocket('ws://localhost:8000/ws/approvals');
51
52	ws.onmessage = (event) => {
53	const data = JSON.parse(event.data);
54	if (data.type === 'approval_request') {
55	showApprovalRequest(data.action);
56	}
57	};
58
59	function approve(actionId) {
60	ws.send(JSON.stringify({
61	action_id: actionId,
62	approved: true
63	}));
64	}
65
66	function reject(actionId) {
67	ws.send(JSON.stringify({
68	action_id: actionId,
69	approved: false,
70	comment: prompt('Reason for rejection:')
71	}));
72	}
73	</script>
74	</body>
75	</html>
76	""")
77

1	# ❌ Optimistic (dangerous)
2	def execute(self, action):
3	return self._do_action(action)
4
5	# ✅ Conservative (safe)
6	def execute(self, action):
7	if self._is_safe(action):
8	return self._do_action(action)
9	else:
10	return self._request_approval(action)
11

1	# ❌ Vague approval request
2	"Approve action?"
3
4	# ✅ Rich context
5	f"""
6	Action: {action_type}
7	Target: {target}
8	Impact: {impact_description}
9	Risk Level: {risk}
10	Reversible: {is_reversible}
11	Similar past actions: {past_examples}
12
13	Agent's reasoning: {reasoning}
14	"""
15

1	async def request_with_timeout(self, request, timeout=300):
2	try:
3	response = await asyncio.wait_for(
4	self._get_human_response(request),
5	timeout=timeout
6	)
7	return response
8	except asyncio.TimeoutError:
9	# Default to safe action on timeout
10	return self._safe_default(request)
11

1	def record_decision(self, request, response, outcome):
2	"""Track decisions to improve future automation"""
3	self.decisions.append({
4	"request": request,
5	"response": response,
6	"outcome": outcome,
7	"timestamp": datetime.now()
8	})
9
10	# Analyze patterns
11	if self._should_automate(request.type):
12	self._add_to_auto_approve(request.type)
13