Back to Blog

HopX for Education: Safe Coding Environments for Students

Use CasesAlin Dobra10 min read

HopX for Education: Safe Coding Environments for Students

Teaching programming requires students to run code. But running untrusted student code on shared infrastructure is a security nightmare. One student's infinite loop shouldn't crash the server. One malicious submission shouldn't access other students' data.

HopX sandboxes provide isolated execution environments perfect for educational platforms—safe, scalable, and easy to integrate.

The Challenge of Educational Code Execution

What can go wrong:

  • Infinite loops consuming all CPU
  • Memory exhaustion (allocating huge arrays)
  • Fork bombs (spawning processes)
  • File system access (reading other students' submissions)
  • Network attacks from your infrastructure
  • Malicious code execution

Traditional solutions and their problems:

  • Docker containers: Weak isolation, escape vulnerabilities
  • Shared servers: No isolation, one crash affects all
  • Local execution only: Can't verify, can't scale
  • Manual review: Doesn't scale, inconsistent

HopX for Education

Each student's code runs in its own micro-VM:

  • Complete isolation from other students
  • Complete isolation from your infrastructure
  • Resource limits (CPU, memory, time)
  • No network access by default
  • Instant cleanup after execution

Building a Code Execution Platform

Basic Code Runner

python
1
from hopx import Sandbox
2
from dataclasses import dataclass
3
from enum import Enum
4
 
5
class Language(Enum):
6
    PYTHON = "python"
7
    JAVASCRIPT = "javascript"
8
    JAVA = "java"
9
    CPP = "cpp"
10
 
11
@dataclass
12
class ExecutionResult:
13
    success: bool
14
    stdout: str
15
    stderr: str
16
    exit_code: int
17
    execution_time: float
18
    memory_used: int = 0
19
 
20
class CodeRunner:
21
    LANGUAGE_CONFIG = {
22
        Language.PYTHON: {
23
            "template": "code-interpreter",
24
            "file": "solution.py",
25
            "command": "python solution.py"
26
        },
27
        Language.JAVASCRIPT: {
28
            "template": "node",
29
            "file": "solution.js",
30
            "command": "node solution.js"
31
        },
32
        Language.JAVA: {
33
            "template": "base",
34
            "file": "Solution.java",
35
            "command": "javac Solution.java && java Solution"
36
        },
37
        Language.CPP: {
38
            "template": "base",
39
            "file": "solution.cpp",
40
            "command": "g++ -o solution solution.cpp && ./solution"
41
        }
42
    }
43
    
44
    def run(
45
        self,
46
        code: str,
47
        language: Language,
48
        stdin: str = "",
49
        timeout: int = 30,
50
        memory_limit: str = "256M"
51
    ) -> ExecutionResult:
52
        """Execute code safely in a sandbox"""
53
        
54
        config = self.LANGUAGE_CONFIG[language]
55
        sandbox = Sandbox.create(
56
            template=config["template"],
57
            memory_limit=memory_limit
58
        )
59
        
60
        try:
61
            import time
62
            start = time.time()
63
            
64
            # Write code file
65
            sandbox.files.write(f"/app/{config['file']}", code)
66
            
67
            # Write stdin if provided
68
            if stdin:
69
                sandbox.files.write("/app/input.txt", stdin)
70
                command = f"cd /app && {config['command']} < input.txt"
71
            else:
72
                command = f"cd /app && {config['command']}"
73
            
74
            # Execute with timeout
75
            result = sandbox.commands.run(command, timeout=timeout)
76
            
77
            execution_time = time.time() - start
78
            
79
            return ExecutionResult(
80
                success=result.exit_code == 0,
81
                stdout=result.stdout,
82
                stderr=result.stderr,
83
                exit_code=result.exit_code,
84
                execution_time=execution_time
85
            )
86
        
87
        except TimeoutError:
88
            return ExecutionResult(
89
                success=False,
90
                stdout="",
91
                stderr="Execution timed out",
92
                exit_code=-1,
93
                execution_time=timeout
94
            )
95
        
96
        finally:
97
            sandbox.kill()
98
 
99
 
100
# Usage
101
runner = CodeRunner()
102
 
103
result = runner.run(
104
    code='print("Hello, World!")',
105
    language=Language.PYTHON
106
)
107
print(f"Output: {result.stdout}")
108
print(f"Time: {result.execution_time:.2f}s")
109
 

Auto-Grading System

python
1
from dataclasses import dataclass
2
from typing import List, Optional
3
import json
4
 
5
@dataclass
6
class TestCase:
7
    input: str
8
    expected_output: str
9
    name: str = ""
10
    points: int = 1
11
    hidden: bool = False
12
 
13
@dataclass
14
class TestResult:
15
    passed: bool
16
    test_case: TestCase
17
    actual_output: str
18
    error: Optional[str] = None
19
 
20
@dataclass
21
class GradeResult:
22
    total_points: int
23
    earned_points: int
24
    percentage: float
25
    test_results: List[TestResult]
26
    passed_count: int
27
    failed_count: int
28
 
29
class AutoGrader:
30
    def __init__(self):
31
        self.runner = CodeRunner()
32
    
33
    def grade(
34
        self,
35
        code: str,
36
        language: Language,
37
        test_cases: List[TestCase],
38
        timeout_per_test: int = 5
39
    ) -> GradeResult:
40
        """Grade a submission against test cases"""
41
        
42
        results = []
43
        earned = 0
44
        total = sum(tc.points for tc in test_cases)
45
        
46
        for test in test_cases:
47
            # Run code with test input
48
            execution = self.runner.run(
49
                code=code,
50
                language=language,
51
                stdin=test.input,
52
                timeout=timeout_per_test
53
            )
54
            
55
            # Compare output
56
            actual = execution.stdout.strip()
57
            expected = test.expected_output.strip()
58
            passed = actual == expected
59
            
60
            if passed:
61
                earned += test.points
62
            
63
            results.append(TestResult(
64
                passed=passed,
65
                test_case=test,
66
                actual_output=actual if not test.hidden else "[hidden]",
67
                error=execution.stderr if execution.stderr else None
68
            ))
69
        
70
        return GradeResult(
71
            total_points=total,
72
            earned_points=earned,
73
            percentage=(earned / total * 100) if total > 0 else 0,
74
            test_results=results,
75
            passed_count=sum(1 for r in results if r.passed),
76
            failed_count=sum(1 for r in results if not r.passed)
77
        )
78
 
79
 
80
# Example: Grading a "Sum of Two Numbers" problem
81
grader = AutoGrader()
82
 
83
test_cases = [
84
    TestCase(input="2 3", expected_output="5", name="Basic addition"),
85
    TestCase(input="0 0", expected_output="0", name="Zero case"),
86
    TestCase(input="-5 10", expected_output="5", name="Negative numbers"),
87
    TestCase(input="999999 1", expected_output="1000000", name="Large numbers", hidden=True),
88
]
89
 
90
student_code = """
91
a, b = map(int, input().split())
92
print(a + b)
93
"""
94
 
95
result = grader.grade(
96
    code=student_code,
97
    language=Language.PYTHON,
98
    test_cases=test_cases
99
)
100
 
101
print(f"Score: {result.earned_points}/{result.total_points} ({result.percentage:.1f}%)")
102
for tr in result.test_results:
103
    status = "✓" if tr.passed else "✗"
104
    if not tr.test_case.hidden:
105
        print(f"  {status} {tr.test_case.name}: {tr.actual_output}")
106
 

Interactive Coding Environment

Build a web-based coding environment:

python
1
from fastapi import FastAPI, WebSocket
2
from pydantic import BaseModel
3
import asyncio
4
import json
5
 
6
app = FastAPI()
7
 
8
class CodeRequest(BaseModel):
9
    code: str
10
    language: str
11
    stdin: str = ""
12
 
13
@app.websocket("/ws/repl")
14
async def interactive_repl(websocket: WebSocket):
15
    """Interactive Python REPL via WebSocket"""
16
    await websocket.accept()
17
    
18
    sandbox = Sandbox.create(template="code-interpreter")
19
    
20
    try:
21
        # Start interactive Python
22
        await websocket.send_json({"type": "ready", "message": "Python REPL ready"})
23
        
24
        while True:
25
            data = await websocket.receive_json()
26
            
27
            if data["type"] == "execute":
28
                code = data["code"]
29
                
30
                # Execute code
31
                result = sandbox.commands.run(f'python -c "{code}"')
32
                
33
                await websocket.send_json({
34
                    "type": "result",
35
                    "stdout": result.stdout,
36
                    "stderr": result.stderr,
37
                    "success": result.exit_code == 0
38
                })
39
            
40
            elif data["type"] == "reset":
41
                # Create fresh sandbox
42
                sandbox.kill()
43
                sandbox = Sandbox.create(template="code-interpreter")
44
                await websocket.send_json({"type": "reset_complete"})
45
    
46
    except Exception as e:
47
        await websocket.send_json({"type": "error", "message": str(e)})
48
    
49
    finally:
50
        sandbox.kill()
51
 
52
 
53
@app.post("/api/run")
54
async def run_code(request: CodeRequest):
55
    """Simple code execution endpoint"""
56
    runner = CodeRunner()
57
    
58
    language = Language(request.language)
59
    result = runner.run(
60
        code=request.code,
61
        language=language,
62
        stdin=request.stdin
63
    )
64
    
65
    return {
66
        "stdout": result.stdout,
67
        "stderr": result.stderr,
68
        "success": result.success,
69
        "execution_time": result.execution_time
70
    }
71
 

Course Content Integration

Embeddable Code Exercises

python
1
@dataclass
2
class CodingExercise:
3
    id: str
4
    title: str
5
    description: str
6
    starter_code: str
7
    solution: str
8
    test_cases: List[TestCase]
9
    hints: List[str]
10
    language: Language = Language.PYTHON
11
 
12
class ExerciseRunner:
13
    def __init__(self):
14
        self.grader = AutoGrader()
15
    
16
    def check_solution(self, exercise: CodingExercise, student_code: str) -> dict:
17
        """Check student solution against exercise"""
18
        
19
        result = self.grader.grade(
20
            code=student_code,
21
            language=exercise.language,
22
            test_cases=exercise.test_cases
23
        )
24
        
25
        # Generate feedback
26
        feedback = []
27
        for tr in result.test_results:
28
            if not tr.passed and not tr.test_case.hidden:
29
                feedback.append(
30
                    f"Test '{tr.test_case.name}' failed: "
31
                    f"expected '{tr.test_case.expected_output}', "
32
                    f"got '{tr.actual_output}'"
33
                )
34
        
35
        # Suggest hints if struggling
36
        hints_to_show = []
37
        if result.percentage < 50 and len(exercise.hints) > 0:
38
            hints_to_show = exercise.hints[:1]  # Show first hint
39
        
40
        return {
41
            "passed": result.percentage == 100,
42
            "score": result.percentage,
43
            "passed_tests": result.passed_count,
44
            "total_tests": len(result.test_results),
45
            "feedback": feedback,
46
            "hints": hints_to_show
47
        }
48
 
49
 
50
# Example exercise
51
exercise = CodingExercise(
52
    id="fibonacci-001",
53
    title="Fibonacci Sequence",
54
    description="""
55
Write a function `fibonacci(n)` that returns the nth Fibonacci number.
56
- fibonacci(0) = 0
57
- fibonacci(1) = 1
58
- fibonacci(n) = fibonacci(n-1) + fibonacci(n-2) for n > 1
59
    """,
60
    starter_code="""
61
def fibonacci(n):
62
    # Your code here
63
    pass
64
 
65
# Read input and print result
66
n = int(input())
67
print(fibonacci(n))
68
    """,
69
    solution="""
70
def fibonacci(n):
71
    if n <= 1:
72
        return n
73
    a, b = 0, 1
74
    for _ in range(n - 1):
75
        a, b = b, a + b
76
    return b
77
 
78
n = int(input())
79
print(fibonacci(n))
80
    """,
81
    test_cases=[
82
        TestCase(input="0", expected_output="0", name="Base case 0"),
83
        TestCase(input="1", expected_output="1", name="Base case 1"),
84
        TestCase(input="5", expected_output="5", name="Small number"),
85
        TestCase(input="10", expected_output="55", name="Medium number"),
86
        TestCase(input="20", expected_output="6765", name="Larger number", hidden=True),
87
    ],
88
    hints=[
89
        "Remember the base cases: fibonacci(0) = 0 and fibonacci(1) = 1",
90
        "Try using iteration instead of recursion for better performance",
91
    ]
92
)
93
 

Progress Tracking

python
1
from datetime import datetime
2
from typing import Dict, Any
3
 
4
class StudentProgress:
5
    def __init__(self, student_id: str):
6
        self.student_id = student_id
7
        self.submissions: List[Dict[str, Any]] = []
8
        self.completed_exercises: set = set()
9
    
10
    def record_submission(
11
        self,
12
        exercise_id: str,
13
        code: str,
14
        result: dict
15
    ):
16
        """Record a submission attempt"""
17
        self.submissions.append({
18
            "exercise_id": exercise_id,
19
            "code": code,
20
            "result": result,
21
            "timestamp": datetime.now().isoformat()
22
        })
23
        
24
        if result["passed"]:
25
            self.completed_exercises.add(exercise_id)
26
    
27
    def get_stats(self) -> dict:
28
        """Get student statistics"""
29
        total_submissions = len(self.submissions)
30
        successful = sum(1 for s in self.submissions if s["result"]["passed"])
31
        
32
        return {
33
            "total_submissions": total_submissions,
34
            "successful_submissions": successful,
35
            "success_rate": successful / total_submissions if total_submissions > 0 else 0,
36
            "completed_exercises": len(self.completed_exercises),
37
            "avg_attempts_per_exercise": total_submissions / len(self.completed_exercises) if self.completed_exercises else 0
38
        }
39
 

Classroom Features

Live Code Comparison

Let instructors see all student code in real-time:

python
1
class ClassroomSession:
2
    def __init__(self, session_id: str, exercise: CodingExercise):
3
        self.session_id = session_id
4
        self.exercise = exercise
5
        self.student_code: Dict[str, str] = {}
6
        self.student_results: Dict[str, dict] = {}
7
    
8
    def update_code(self, student_id: str, code: str):
9
        """Update student's current code"""
10
        self.student_code[student_id] = code
11
    
12
    def submit(self, student_id: str, code: str) -> dict:
13
        """Submit and grade student code"""
14
        runner = ExerciseRunner()
15
        result = runner.check_solution(self.exercise, code)
16
        self.student_results[student_id] = result
17
        return result
18
    
19
    def get_class_progress(self) -> dict:
20
        """Get overall class progress"""
21
        total = len(self.student_results)
22
        if total == 0:
23
            return {"participants": 0}
24
        
25
        passed = sum(1 for r in self.student_results.values() if r["passed"])
26
        avg_score = sum(r["score"] for r in self.student_results.values()) / total
27
        
28
        return {
29
            "participants": total,
30
            "completed": passed,
31
            "completion_rate": passed / total * 100,
32
            "average_score": avg_score,
33
            "struggling": [
34
                sid for sid, r in self.student_results.items() 
35
                if r["score"] < 50
36
            ]
37
        }
38
 

Plagiarism Detection

Basic code similarity checking:

python
1
import difflib
2
 
3
def calculate_similarity(code1: str, code2: str) -> float:
4
    """Calculate similarity between two code submissions"""
5
    # Normalize code
6
    def normalize(code):
7
        lines = code.strip().split('\n')
8
        # Remove empty lines and comments
9
        lines = [l.strip() for l in lines if l.strip() and not l.strip().startswith('#')]
10
        return '\n'.join(lines)
11
    
12
    norm1 = normalize(code1)
13
    norm2 = normalize(code2)
14
    
15
    return difflib.SequenceMatcher(None, norm1, norm2).ratio()
16
 
17
 
18
def check_plagiarism(submissions: Dict[str, str], threshold: float = 0.8) -> List[tuple]:
19
    """Check for similar submissions"""
20
    flagged = []
21
    students = list(submissions.keys())
22
    
23
    for i, s1 in enumerate(students):
24
        for s2 in students[i+1:]:
25
            similarity = calculate_similarity(submissions[s1], submissions[s2])
26
            if similarity >= threshold:
27
                flagged.append((s1, s2, similarity))
28
    
29
    return sorted(flagged, key=lambda x: x[2], reverse=True)
30
 
31
 
32
# Usage
33
submissions = {
34
    "student1": "def add(a, b):\n    return a + b",
35
    "student2": "def add(a, b):\n    return a + b  # sum",
36
    "student3": "def sum_nums(x, y):\n    result = x + y\n    return result",
37
}
38
 
39
flagged = check_plagiarism(submissions)
40
for s1, s2, sim in flagged:
41
    print(f"Possible plagiarism: {s1} and {s2} ({sim*100:.1f}% similar)")
42
 

Security Best Practices

1. Strict Resource Limits

python
1
sandbox = Sandbox.create(
2
    template="code-interpreter",
3
    memory_limit="256M",  # Max memory
4
    cpu_limit=0.5,        # Half CPU
5
    disk_limit="100M"     # Max disk usage
6
)
7
 
8
# Execute with timeout
9
result = sandbox.commands.run(command, timeout=10)  # 10 second max
10
 

2. Input Validation

python
1
def validate_code(code: str, language: Language) -> tuple[bool, str]:
2
    """Validate code before execution"""
3
    
4
    # Check size
5
    if len(code) > 50000:  # 50KB max
6
        return False, "Code too long"
7
    
8
    # Check for dangerous patterns
9
    dangerous = [
10
        "import os", "import subprocess", "import socket",
11
        "open('/etc", "__import__", "eval(", "exec(",
12
        "system(", "popen("
13
    ]
14
    
15
    for pattern in dangerous:
16
        if pattern in code:
17
            return False, f"Forbidden pattern: {pattern}"
18
    
19
    return True, "OK"
20
 

3. Network Isolation

python
1
# Sandboxes have no network access by default
2
# This prevents:
3
# - Downloading malicious code
4
# - Attacking external services
5
# - Data exfiltration
6
 
7
sandbox = Sandbox.create(
8
    template="code-interpreter",
9
    network=False  # Explicitly disable network
10
)
11
 

4. File System Protection

python
1
# Each sandbox has isolated filesystem
2
# Students can't:
3
# - Read other students' code
4
# - Access system files
5
# - Persist data between runs
6
 
7
# Optional: provide read-only data files
8
sandbox.files.write("/data/dataset.csv", data, readonly=True)
9
 

LMS Integration

API for Learning Management Systems

python
1
from fastapi import FastAPI, Header, HTTPException
2
import hmac
3
import hashlib
4
 
5
app = FastAPI()
6
 
7
def verify_lms_signature(payload: bytes, signature: str, secret: str) -> bool:
8
    """Verify webhook signature from LMS"""
9
    expected = hmac.new(secret.encode(), payload, hashlib.sha256).hexdigest()
10
    return hmac.compare_digest(signature, expected)
11
 
12
 
13
@app.post("/lms/submit")
14
async def lms_submission(
15
    student_id: str,
16
    assignment_id: str,
17
    code: str,
18
    x_lms_signature: str = Header(None)
19
):
20
    """Handle submission from LMS"""
21
    
22
    # Verify signature
23
    # ...
24
    
25
    # Get assignment config
26
    assignment = get_assignment(assignment_id)
27
    
28
    # Grade
29
    grader = AutoGrader()
30
    result = grader.grade(
31
        code=code,
32
        language=assignment.language,
33
        test_cases=assignment.test_cases
34
    )
35
    
36
    # Return grade to LMS
37
    return {
38
        "student_id": student_id,
39
        "assignment_id": assignment_id,
40
        "score": result.percentage,
41
        "passed": result.percentage >= assignment.passing_score,
42
        "feedback": generate_feedback(result)
43
    }
44
 

Scaling Considerations

Handling Many Simultaneous Submissions

python
1
import asyncio
2
from concurrent.futures import ThreadPoolExecutor
3
 
4
class ScalableGrader:
5
    def __init__(self, max_concurrent: int = 50):
6
        self.executor = ThreadPoolExecutor(max_workers=max_concurrent)
7
        self.grader = AutoGrader()
8
    
9
    async def grade_batch(
10
        self,
11
        submissions: List[tuple[str, str, Language, List[TestCase]]]
12
    ) -> List[tuple[str, GradeResult]]:
13
        """Grade multiple submissions concurrently"""
14
        
15
        loop = asyncio.get_event_loop()
16
        
17
        async def grade_one(student_id, code, language, tests):
18
            result = await loop.run_in_executor(
19
                self.executor,
20
                self.grader.grade,
21
                code, language, tests
22
            )
23
            return (student_id, result)
24
        
25
        tasks = [
26
            grade_one(sid, code, lang, tests)
27
            for sid, code, lang, tests in submissions
28
        ]
29
        
30
        return await asyncio.gather(*tasks)
31
 
32
 
33
# Usage
34
grader = ScalableGrader(max_concurrent=100)
35
 
36
# Grade 500 submissions
37
submissions = [
38
    (f"student_{i}", code, Language.PYTHON, test_cases)
39
    for i in range(500)
40
]
41
 
42
results = await grader.grade_batch(submissions)
43
 

Conclusion

HopX sandboxes enable educational platforms to:

  • Run untrusted code safely - Complete isolation per student
  • Scale to thousands - Parallel execution without infrastructure concerns
  • Auto-grade reliably - Consistent environments for fair grading
  • Protect everyone - No student can affect another

Build the next generation of coding education with secure, scalable execution.

Resources