File Processing

Learn how to process files (CSV, JSON, images, etc.) using Hopx Sandboxes for safe, isolated file operations.

Basic File Processing

Process a CSV file:

Python
JavaScript/TypeScript

from hopx_ai import Sandbox

def process_csv_file(csv_content: str):
    """Process a CSV file"""
    with Sandbox.create(template="code-interpreter") as sandbox:
        # Upload CSV file
        sandbox.files.write("/workspace/data.csv", csv_content)
        
        result = sandbox.run_code("""
import pandas as pd
import json

# Read CSV
df = pd.read_csv('/workspace/data.csv')

# Process data
df['processed'] = df['value'] * 2  # Example transformation

# Save processed data
df.to_csv('/workspace/processed_data.csv', index=False)

# Generate summary
summary = {
    'total_rows': len(df),
    'columns': list(df.columns),
    'processed_rows': len(df[df['processed'] > 0])
}

with open('/workspace/summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print(f"Processed {len(df)} rows")
        """)
        
        # Download processed file
        processed_data = sandbox.files.read("/workspace/processed_data.csv")
        summary_json = sandbox.files.read("/workspace/summary.json")
        import json
        return {
            "processed_data": processed_data,
            "summary": json.loads(summary_json)
        }

# Example
csv_data = """id,value
1,10
2,20
3,30
"""

results = process_csv_file(csv_data)
print(f"Summary: {results['summary']}")
print(f"Processed data:\\n{results['processed_data']}")

Batch File Processing

Process multiple files:

Python
JavaScript/TypeScript

from hopx_ai import Sandbox
import json

def process_multiple_files(files: dict):
    """Process multiple files"""
    with Sandbox.create(template="code-interpreter") as sandbox:
        # Upload all files
        for filename, content in files.items():
            sandbox.files.write(f"/workspace/{filename}", content)
        
        result = sandbox.run_code("""
import pandas as pd
import json
import os
from pathlib import Path

# Process all CSV files
results = {}
workspace = Path('/workspace')

# Find all CSV files
csv_files = [f for f in os.listdir('/workspace') if f.endswith('.csv')]
for csv_filename in csv_files:
    csv_file = workspace / csv_filename
    df = pd.read_csv(csv_file)
    
    # Process each file
    processed = {
        'filename': csv_file.name,
        'rows': len(df),
        'columns': list(df.columns),
        'summary': df.describe().to_dict() if len(df.select_dtypes(include=['number']).columns) > 0 else None
    }
    
    # Save processed version
    output_file = workspace / f"processed_{csv_file.name}"
    df.to_csv(output_file, index=False)
    
    results[csv_file.name] = processed

# Save results
with open('/workspace/processing_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"Processed {len(results)} files")
        """)
        
        # Download results
        results_json = sandbox.files.read("/workspace/processing_results.json")
        return json.loads(results_json)

# Example
files = {
    "data1.csv": "id,value\n1,10\n2,20",
    "data2.csv": "id,value\n3,30\n4,40"
}

results = process_multiple_files(files)
for filename, data in results.items():
    print(f"{filename}: {data['rows']} rows")

Image Processing

Process image files:

Python
JavaScript/TypeScript

from hopx_ai import Sandbox
import base64

def process_image(image_data: bytes):
    """Process an image file"""
    with Sandbox.create(template="code-interpreter") as sandbox:
        # Upload image
        sandbox.files.write_bytes("/workspace/image.jpg", image_data)
        
        sandbox.commands.run("pip install Pillow --quiet")
        
        result = sandbox.run_code("""
from PIL import Image
import json

# Open image
img = Image.open('/workspace/image.jpg')

# Get image info
info = {
    'format': img.format,
    'mode': img.mode,
    'size': img.size,
    'width': img.width,
    'height': img.height
}

# Resize image
resized = img.resize((800, 600))
resized.save('/workspace/resized_image.jpg', 'JPEG')

# Convert to grayscale
grayscale = img.convert('L')
grayscale.save('/workspace/grayscale_image.jpg', 'JPEG')

# Save info
with open('/workspace/image_info.json', 'w') as f:
    json.dump(info, f, indent=2)

print(f"Processed image: {info['width']}x{info['height']}")
        """)
        
        # Download processed images
        resized_image = sandbox.files.read_bytes("/workspace/resized_image.jpg")
        grayscale_image = sandbox.files.read_bytes("/workspace/grayscale_image.jpg")
        info_json = sandbox.files.read("/workspace/image_info.json")
        import json
        
        return {
            "resized_image": base64.b64encode(resized_image).decode('utf-8'),
            "grayscale_image": base64.b64encode(grayscale_image).decode('utf-8'),
            "info": json.loads(info_json)
        }

# Example: Download and process an image
import requests

# Download an image from the internet
response = requests.get("https://picsum.photos/200/200")
image_bytes = response.content

# Process the image
results = process_image(image_bytes)
print(f"Image info: {results['info']}")

Next Steps

File Operations

Learn about file upload and download operations

Data Analysis Examples

Process data files for analysis

Error Handling

Handle file processing errors

Performance

Optimize file processing performance

Getting Started

Core Concepts

Tutorials

Cookbook

Guides

Best Practices

Troubleshooting

Reference

Basic File Processing

Batch File Processing

Image Processing

Next Steps

File Operations

Data Analysis Examples

Error Handling

Performance

Getting Started

Core Concepts

Tutorials

Cookbook

Guides

Best Practices

Troubleshooting

Reference

​Basic File Processing

​Batch File Processing

​Image Processing

​Next Steps

File Operations

Data Analysis Examples

Error Handling

Performance

Basic File Processing

Batch File Processing

Image Processing

Next Steps