Skip to main content
Practical examples for data analysis workflows using Pandas, NumPy, and Matplotlib in Hopx Sandboxes.

Basic Data Analysis

Load and analyze CSV data:
  • Python
  • JavaScript/TypeScript
from hopx_ai import Sandbox

def analyze_csv(csv_data: str):
    """Analyze CSV data"""
    with Sandbox.create(template="code-interpreter") as sandbox:
        # Upload data
        sandbox.files.write("/workspace/data.csv", csv_data)
        
        # Analyze
        result = sandbox.run_code("""
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv('/workspace/data.csv')

# Basic statistics
print("Dataset Info:")
print(df.info())
print("\\nSummary Statistics:")
print(df.describe())
print("\\nFirst 5 rows:")
print(df.head())
        """)
        
        return result.stdout

# Example
csv_data = """name,age,salary,department
Alice,25,75000,Engineering
Bob,30,85000,Marketing
Charlie,35,95000,Engineering
Diana,28,80000,Sales
Eve,32,90000,Engineering
"""

output = analyze_csv(csv_data)
print(output)

Data Visualization

Create charts and plots:
  • Python
  • JavaScript/TypeScript
from hopx_ai import Sandbox
import base64

def create_visualizations(csv_data: str):
    """Create data visualizations"""
    with Sandbox.create(template="code-interpreter") as sandbox:
        sandbox.files.write("/workspace/data.csv", csv_data)
        
        result = sandbox.run_code("""
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
df = pd.read_csv('/workspace/data.csv')

# Set style
sns.set_style('whitegrid')

# Create figure
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Data Analysis Dashboard', fontsize=16)

# Age distribution
axes[0, 0].hist(df['age'], bins=10, edgecolor='black', color='skyblue')
axes[0, 0].set_title('Age Distribution')
axes[0, 0].set_xlabel('Age')
axes[0, 0].set_ylabel('Count')

# Salary by department
dept_salary = df.groupby('department')['salary'].mean()
axes[0, 1].bar(dept_salary.index, dept_salary.values, color='lightgreen')
axes[0, 1].set_title('Average Salary by Department')
axes[0, 1].set_xlabel('Department')
axes[0, 1].set_ylabel('Salary')
axes[0, 1].tick_params(axis='x', rotation=45)

# Age vs Salary scatter
axes[1, 0].scatter(df['age'], df['salary'], s=100, alpha=0.6, color='coral')
axes[1, 0].set_title('Age vs Salary')
axes[1, 0].set_xlabel('Age')
axes[1, 0].set_ylabel('Salary')
axes[1, 0].grid(True, alpha=0.3)

# Department distribution
dept_counts = df['department'].value_counts()
axes[1, 1].pie(dept_counts.values, labels=dept_counts.index, autopct='%1.1f%%')
axes[1, 1].set_title('Employees by Department')

plt.tight_layout()
plt.savefig('/workspace/visualizations.png', dpi=150, bbox_inches='tight')
print("✅ Visualizations saved!")
        """)
        
        # Download image
        image_data = sandbox.files.read_bytes("/workspace/visualizations.png")
        return base64.b64encode(image_data).decode('utf-8')

csv_data = """name,age,salary,department
Alice,25,75000,Engineering
Bob,30,85000,Marketing
Charlie,35,95000,Engineering
Diana,28,80000,Sales
Eve,32,90000,Engineering
"""

image_base64 = create_visualizations(csv_data)
# Use image_base64 in your application

Next Steps

For more advanced data analysis workflows, including statistical analysis, time series analysis, and complete pipeline patterns, see the Data Analysis Pipeline Tutorial.