Secure File Operations in AI Sandboxes
AI agents often need to work with files—processing uploads, generating documents, analyzing data. Doing this securely requires careful handling. HopX sandboxes provide isolated file systems where agents can safely manipulate files without affecting your host system.
This guide covers everything you need to know about file operations in sandboxes.
File System Overview
Each HopX sandbox has its own isolated filesystem:
| 1 | / |
| 2 | ├── app/ # Default working directory |
| 3 | ├── tmp/ # Temporary files |
| 4 | ├── home/ # User home directory |
| 5 | ├── data/ # Data storage |
| 6 | └── ... # Standard Linux directories |
| 7 | |
Files in a sandbox:
- Are completely isolated from your host
- Persist only for the sandbox lifetime
- Can be uploaded to and downloaded from
- Support all standard file operations
Basic File Operations
Writing Files
Write content to a file in the sandbox:
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Write a text file |
| 6 | sandbox.files.write("/app/data.txt", "Hello, World!") |
| 7 | |
| 8 | # Write binary content |
| 9 | with open("local_image.png", "rb") as f: |
| 10 | binary_data = f.read() |
| 11 | sandbox.files.write("/app/image.png", binary_data) |
| 12 | |
| 13 | # Write Python code |
| 14 | code = ''' |
| 15 | import pandas as pd |
| 16 | df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) |
| 17 | print(df.to_string()) |
| 18 | ''' |
| 19 | sandbox.files.write("/app/analysis.py", code) |
| 20 | |
Reading Files
Read files from the sandbox:
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Execute code that creates a file |
| 6 | sandbox.commands.run("echo 'Generated content' > /app/output.txt") |
| 7 | |
| 8 | # Read the file |
| 9 | content = sandbox.files.read("/app/output.txt") |
| 10 | print(content) # b'Generated content\n' |
| 11 | |
| 12 | # Decode text files |
| 13 | text = sandbox.files.read("/app/output.txt").decode('utf-8') |
| 14 | print(text) # 'Generated content\n' |
| 15 | |
Listing Files
List directory contents:
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Create some files |
| 6 | sandbox.commands.run(""" |
| 7 | mkdir -p /app/data |
| 8 | echo 'file1' > /app/data/file1.txt |
| 9 | echo 'file2' > /app/data/file2.txt |
| 10 | echo 'file3' > /app/data/file3.csv |
| 11 | """) |
| 12 | |
| 13 | # List directory |
| 14 | files = sandbox.files.list("/app/data") |
| 15 | for f in files: |
| 16 | print(f"{f.name} - {f.size} bytes - {'dir' if f.is_dir else 'file'}") |
| 17 | |
Checking File Existence
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Check if file exists |
| 6 | exists = sandbox.files.exists("/app/myfile.txt") |
| 7 | print(f"File exists: {exists}") |
| 8 | |
| 9 | # Check via command (alternative) |
| 10 | result = sandbox.commands.run("test -f /app/myfile.txt && echo 'yes' || echo 'no'") |
| 11 | print(result.stdout) |
| 12 | |
Deleting Files
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Create and delete a file |
| 6 | sandbox.files.write("/app/temp.txt", "temporary content") |
| 7 | sandbox.files.delete("/app/temp.txt") |
| 8 | |
| 9 | # Delete via command (for complex patterns) |
| 10 | sandbox.commands.run("rm -rf /app/cache/*") |
| 11 | |
Uploading Files to Sandboxes
Single File Upload
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Upload from local path |
| 6 | with open("dataset.csv", "rb") as f: |
| 7 | sandbox.files.write("/app/dataset.csv", f.read()) |
| 8 | |
| 9 | # Verify upload |
| 10 | result = sandbox.commands.run("head -5 /app/dataset.csv") |
| 11 | print(result.stdout) |
| 12 | |
Multiple File Upload
| 1 | from hopx import Sandbox |
| 2 | import os |
| 3 | |
| 4 | sandbox = Sandbox.create(template="code-interpreter") |
| 5 | |
| 6 | # Upload entire directory |
| 7 | local_dir = "./project_files" |
| 8 | for root, dirs, files in os.walk(local_dir): |
| 9 | for filename in files: |
| 10 | local_path = os.path.join(root, filename) |
| 11 | # Preserve directory structure |
| 12 | relative_path = os.path.relpath(local_path, local_dir) |
| 13 | sandbox_path = f"/app/{relative_path}" |
| 14 | |
| 15 | with open(local_path, "rb") as f: |
| 16 | sandbox.files.write(sandbox_path, f.read()) |
| 17 | print(f"Uploaded: {relative_path}") |
| 18 | |
Streaming Large Files
For large files, stream content instead of loading entirely into memory:
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | def upload_large_file(sandbox, local_path, sandbox_path, chunk_size=1024*1024): |
| 6 | """Upload file in chunks""" |
| 7 | with open(local_path, "rb") as f: |
| 8 | # First chunk creates the file |
| 9 | chunk = f.read(chunk_size) |
| 10 | sandbox.files.write(sandbox_path, chunk) |
| 11 | |
| 12 | # Subsequent chunks append |
| 13 | while True: |
| 14 | chunk = f.read(chunk_size) |
| 15 | if not chunk: |
| 16 | break |
| 17 | # Append mode via command |
| 18 | sandbox.commands.run( |
| 19 | f"cat >> {sandbox_path}", |
| 20 | stdin=chunk |
| 21 | ) |
| 22 | |
| 23 | upload_large_file(sandbox, "large_model.bin", "/app/model.bin") |
| 24 | |
Downloading Files from Sandboxes
Single File Download
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Generate a file in sandbox |
| 6 | sandbox.commands.run(""" |
| 7 | python -c " |
| 8 | import matplotlib.pyplot as plt |
| 9 | import numpy as np |
| 10 | |
| 11 | x = np.linspace(0, 10, 100) |
| 12 | plt.plot(x, np.sin(x)) |
| 13 | plt.savefig('/app/plot.png') |
| 14 | " |
| 15 | """) |
| 16 | |
| 17 | # Download the generated file |
| 18 | image_data = sandbox.files.read("/app/plot.png") |
| 19 | with open("downloaded_plot.png", "wb") as f: |
| 20 | f.write(image_data) |
| 21 | |
| 1 | from hopx import Sandbox |
| 2 | import json |
| 3 | |
| 4 | sandbox = Sandbox.create(template="code-interpreter") |
| 5 | |
| 6 | # Get file info before downloading |
| 7 | info = sandbox.files.stat("/app/output.csv") |
| 8 | print(f"Size: {info.size} bytes") |
| 9 | print(f"Modified: {info.modified_at}") |
| 10 | |
| 11 | # Download only if needed |
| 12 | if info.size > 0: |
| 13 | content = sandbox.files.read("/app/output.csv") |
| 14 | with open("output.csv", "wb") as f: |
| 15 | f.write(content) |
| 16 | |
Download Directory as Archive
| 1 | from hopx import Sandbox |
| 2 | import io |
| 3 | import tarfile |
| 4 | |
| 5 | sandbox = Sandbox.create(template="code-interpreter") |
| 6 | |
| 7 | # Create archive in sandbox |
| 8 | sandbox.commands.run("tar czf /tmp/output.tar.gz -C /app output_dir/") |
| 9 | |
| 10 | # Download archive |
| 11 | archive_data = sandbox.files.read("/tmp/output.tar.gz") |
| 12 | |
| 13 | # Extract locally |
| 14 | with tarfile.open(fileobj=io.BytesIO(archive_data), mode='r:gz') as tar: |
| 15 | tar.extractall("./downloaded_output") |
| 16 | |
Working with Data Files
CSV Processing
| 1 | from hopx import Sandbox |
| 2 | import json |
| 3 | |
| 4 | sandbox = Sandbox.create(template="code-interpreter") |
| 5 | |
| 6 | # Upload CSV |
| 7 | csv_data = """name,age,city |
| 8 | Alice,30,New York |
| 9 | Bob,25,San Francisco |
| 10 | Carol,35,Chicago""" |
| 11 | sandbox.files.write("/app/people.csv", csv_data) |
| 12 | |
| 13 | # Process with pandas |
| 14 | result = sandbox.commands.run(""" |
| 15 | python -c " |
| 16 | import pandas as pd |
| 17 | import json |
| 18 | |
| 19 | df = pd.read_csv('/app/people.csv') |
| 20 | |
| 21 | # Analysis |
| 22 | stats = { |
| 23 | 'count': len(df), |
| 24 | 'avg_age': df['age'].mean(), |
| 25 | 'cities': df['city'].unique().tolist() |
| 26 | } |
| 27 | |
| 28 | print(json.dumps(stats)) |
| 29 | " |
| 30 | """) |
| 31 | |
| 32 | stats = json.loads(result.stdout) |
| 33 | print(f"Average age: {stats['avg_age']}") |
| 34 | |
JSON Processing
| 1 | from hopx import Sandbox |
| 2 | import json |
| 3 | |
| 4 | sandbox = Sandbox.create(template="code-interpreter") |
| 5 | |
| 6 | # Upload complex JSON |
| 7 | data = { |
| 8 | "users": [ |
| 9 | {"id": 1, "name": "Alice", "purchases": [100, 200, 150]}, |
| 10 | {"id": 2, "name": "Bob", "purchases": [50, 75, 125]} |
| 11 | ] |
| 12 | } |
| 13 | sandbox.files.write("/app/data.json", json.dumps(data)) |
| 14 | |
| 15 | # Process JSON |
| 16 | result = sandbox.commands.run(""" |
| 17 | python -c " |
| 18 | import json |
| 19 | |
| 20 | with open('/app/data.json') as f: |
| 21 | data = json.load(f) |
| 22 | |
| 23 | # Calculate total purchases per user |
| 24 | for user in data['users']: |
| 25 | total = sum(user['purchases']) |
| 26 | print(f\"{user['name']}: \\${total}\") |
| 27 | " |
| 28 | """) |
| 29 | print(result.stdout) |
| 30 | |
Excel Files
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Upload Excel file |
| 6 | with open("report.xlsx", "rb") as f: |
| 7 | sandbox.files.write("/app/report.xlsx", f.read()) |
| 8 | |
| 9 | # Process with openpyxl/pandas |
| 10 | result = sandbox.commands.run(""" |
| 11 | python -c " |
| 12 | import pandas as pd |
| 13 | |
| 14 | # Read all sheets |
| 15 | xlsx = pd.ExcelFile('/app/report.xlsx') |
| 16 | for sheet in xlsx.sheet_names: |
| 17 | df = pd.read_excel(xlsx, sheet_name=sheet) |
| 18 | print(f'Sheet: {sheet}, Rows: {len(df)}') |
| 19 | " |
| 20 | """) |
| 21 | print(result.stdout) |
| 22 | |
Image Processing
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | # Upload image |
| 6 | with open("photo.jpg", "rb") as f: |
| 7 | sandbox.files.write("/app/photo.jpg", f.read()) |
| 8 | |
| 9 | # Process with Pillow |
| 10 | sandbox.commands.run(""" |
| 11 | python -c " |
| 12 | from PIL import Image |
| 13 | |
| 14 | img = Image.open('/app/photo.jpg') |
| 15 | print(f'Size: {img.size}') |
| 16 | print(f'Format: {img.format}') |
| 17 | |
| 18 | # Resize |
| 19 | resized = img.resize((800, 600)) |
| 20 | resized.save('/app/resized.jpg') |
| 21 | |
| 22 | # Thumbnail |
| 23 | img.thumbnail((150, 150)) |
| 24 | img.save('/app/thumbnail.jpg') |
| 25 | " |
| 26 | """) |
| 27 | |
| 28 | # Download processed images |
| 29 | resized = sandbox.files.read("/app/resized.jpg") |
| 30 | thumbnail = sandbox.files.read("/app/thumbnail.jpg") |
| 31 | |
Security Best Practices
1. Validate File Types
Always validate files before processing:
| 1 | import magic # python-magic library |
| 2 | |
| 3 | def validate_upload(file_data, allowed_types): |
| 4 | """Validate file type using magic numbers""" |
| 5 | mime = magic.from_buffer(file_data, mime=True) |
| 6 | |
| 7 | if mime not in allowed_types: |
| 8 | raise ValueError(f"File type {mime} not allowed") |
| 9 | |
| 10 | return mime |
| 11 | |
| 12 | # Usage |
| 13 | ALLOWED_TYPES = ['image/jpeg', 'image/png', 'text/csv', 'application/pdf'] |
| 14 | |
| 15 | with open("upload.file", "rb") as f: |
| 16 | data = f.read() |
| 17 | file_type = validate_upload(data, ALLOWED_TYPES) |
| 18 | sandbox.files.write(f"/app/upload.{file_type.split('/')[1]}", data) |
| 19 | |
2. Limit File Sizes
Prevent resource exhaustion:
| 1 | MAX_FILE_SIZE = 100 * 1024 * 1024 # 100MB |
| 2 | |
| 3 | def safe_upload(sandbox, file_path, sandbox_path): |
| 4 | """Upload with size limit""" |
| 5 | import os |
| 6 | |
| 7 | size = os.path.getsize(file_path) |
| 8 | if size > MAX_FILE_SIZE: |
| 9 | raise ValueError(f"File too large: {size} bytes (max: {MAX_FILE_SIZE})") |
| 10 | |
| 11 | with open(file_path, "rb") as f: |
| 12 | sandbox.files.write(sandbox_path, f.read()) |
| 13 | |
3. Sanitize Filenames
Prevent path traversal attacks:
| 1 | import os |
| 2 | import re |
| 3 | |
| 4 | def sanitize_filename(filename): |
| 5 | """Remove dangerous characters from filename""" |
| 6 | # Remove path components |
| 7 | filename = os.path.basename(filename) |
| 8 | |
| 9 | # Remove dangerous characters |
| 10 | filename = re.sub(r'[^\w\-\.]', '_', filename) |
| 11 | |
| 12 | # Prevent hidden files |
| 13 | filename = filename.lstrip('.') |
| 14 | |
| 15 | # Limit length |
| 16 | if len(filename) > 255: |
| 17 | name, ext = os.path.splitext(filename) |
| 18 | filename = name[:255-len(ext)] + ext |
| 19 | |
| 20 | return filename |
| 21 | |
| 22 | # Usage |
| 23 | user_filename = "../../../etc/passwd" |
| 24 | safe_name = sanitize_filename(user_filename) # "etc_passwd" |
| 25 | sandbox.files.write(f"/app/uploads/{safe_name}", data) |
| 26 | |
4. Use Temporary Directories
Isolate file operations:
| 1 | from hopx import Sandbox |
| 2 | import uuid |
| 3 | |
| 4 | sandbox = Sandbox.create(template="code-interpreter") |
| 5 | |
| 6 | # Create unique temp directory per operation |
| 7 | session_id = str(uuid.uuid4())[:8] |
| 8 | temp_dir = f"/tmp/session_{session_id}" |
| 9 | |
| 10 | sandbox.commands.run(f"mkdir -p {temp_dir}") |
| 11 | |
| 12 | # All operations in isolated directory |
| 13 | sandbox.files.write(f"{temp_dir}/input.txt", data) |
| 14 | sandbox.commands.run(f"cd {temp_dir} && process_file input.txt output.txt") |
| 15 | result = sandbox.files.read(f"{temp_dir}/output.txt") |
| 16 | |
| 17 | # Clean up |
| 18 | sandbox.commands.run(f"rm -rf {temp_dir}") |
| 19 | |
5. Quota Management
Monitor and limit disk usage:
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | sandbox = Sandbox.create(template="code-interpreter") |
| 4 | |
| 5 | def check_disk_usage(sandbox, max_usage_mb=500): |
| 6 | """Check disk usage in sandbox""" |
| 7 | result = sandbox.commands.run("df -m /app | tail -1 | awk '{print $3}'") |
| 8 | used_mb = int(result.stdout.strip()) |
| 9 | |
| 10 | if used_mb > max_usage_mb: |
| 11 | raise Exception(f"Disk usage ({used_mb}MB) exceeds limit ({max_usage_mb}MB)") |
| 12 | |
| 13 | return used_mb |
| 14 | |
| 15 | # Check before operations |
| 16 | usage = check_disk_usage(sandbox) |
| 17 | print(f"Current usage: {usage}MB") |
| 18 | |
Common Patterns
Process and Return Results
| 1 | from hopx import Sandbox |
| 2 | import json |
| 3 | |
| 4 | def process_data_file(input_file: bytes, processing_code: str) -> dict: |
| 5 | """Process a data file and return results""" |
| 6 | sandbox = Sandbox.create(template="code-interpreter") |
| 7 | |
| 8 | try: |
| 9 | # Upload input |
| 10 | sandbox.files.write("/app/input.data", input_file) |
| 11 | |
| 12 | # Write and run processing code |
| 13 | sandbox.files.write("/app/process.py", processing_code) |
| 14 | result = sandbox.commands.run("cd /app && python process.py") |
| 15 | |
| 16 | if result.exit_code != 0: |
| 17 | return {"error": result.stderr} |
| 18 | |
| 19 | # Return output |
| 20 | return {"output": result.stdout, "success": True} |
| 21 | |
| 22 | finally: |
| 23 | sandbox.kill() |
| 24 | |
Batch File Processing
| 1 | from hopx import Sandbox |
| 2 | import concurrent.futures |
| 3 | |
| 4 | def process_files_batch(files: list[tuple[str, bytes]]) -> list: |
| 5 | """Process multiple files in parallel sandboxes""" |
| 6 | |
| 7 | def process_single(name_and_data): |
| 8 | name, data = name_and_data |
| 9 | sandbox = Sandbox.create(template="code-interpreter") |
| 10 | |
| 11 | try: |
| 12 | sandbox.files.write(f"/app/{name}", data) |
| 13 | result = sandbox.commands.run(f"python /app/analyze.py /app/{name}") |
| 14 | return {"name": name, "result": result.stdout} |
| 15 | finally: |
| 16 | sandbox.kill() |
| 17 | |
| 18 | with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: |
| 19 | results = list(executor.map(process_single, files)) |
| 20 | |
| 21 | return results |
| 22 | |
| 1 | from hopx import Sandbox |
| 2 | |
| 3 | def transform_pipeline(input_file: bytes, transformations: list[str]) -> bytes: |
| 4 | """Apply a series of transformations to a file""" |
| 5 | sandbox = Sandbox.create(template="code-interpreter") |
| 6 | |
| 7 | try: |
| 8 | # Start with input |
| 9 | sandbox.files.write("/app/current.data", input_file) |
| 10 | |
| 11 | # Apply each transformation |
| 12 | for i, transform in enumerate(transformations): |
| 13 | sandbox.files.write(f"/app/transform_{i}.py", transform) |
| 14 | sandbox.commands.run( |
| 15 | f"python /app/transform_{i}.py /app/current.data /app/next.data" |
| 16 | ) |
| 17 | sandbox.commands.run("mv /app/next.data /app/current.data") |
| 18 | |
| 19 | # Return final result |
| 20 | return sandbox.files.read("/app/current.data") |
| 21 | |
| 22 | finally: |
| 23 | sandbox.kill() |
| 24 | |
Conclusion
Secure file operations are essential for AI agents that work with user data. HopX sandboxes provide:
- Isolation - Files never touch your host system
- Security - Each sandbox has its own filesystem
- Flexibility - Full Linux filesystem capabilities
- Simplicity - Clean API for common operations
By following the patterns and best practices in this guide, you can build AI agents that safely process any type of file.
Resources