Documentation

Pandas Optimization

Optimizing pandas DataFrame operations with Epochly for maximum performance.

Overview

Epochly optimizes pandas operations by parallelizing CPU-bound operations and removing Python overhead.

OperationExampleRecommended LevelSpeedup
GroupBygroupby().agg()Level 3 (Multicore)5-15x
Applyapply(func), applymap()Level 3 (Multicore)5-20x
Mergemerge(), join() (inner only)Level 2 (JIT)2-5x
Sortsort_values()Level 2 (JIT)2-4x
Rollingrolling().mean()Level 2 (JIT)2-5x

GroupBy Optimization

Basic GroupBy

import epochly
import pandas as pd
import numpy as np
@epochly.optimize(level=3)
def optimized_groupby(df):
"""Parallel groupby with multiple aggregations"""
result = df.groupby('category').agg({
'value': ['sum', 'mean', 'std'],
'quantity': ['sum', 'max', 'min']
})
return result
# Create large DataFrame
df = pd.DataFrame({
'category': np.random.choice(['A', 'B', 'C', 'D'], 10_000_000),
'value': np.random.rand(10_000_000),
'quantity': np.random.randint(1, 100, 10_000_000)
})
result = optimized_groupby(df)

Multi-Key GroupBy

import epochly
import pandas as pd
import numpy as np
@epochly.optimize(level=3)
def multi_key_groupby(df):
"""GroupBy with multiple keys"""
return df.groupby(['category', 'region']).agg({
'sales': 'sum',
'profit': 'mean',
'units': 'sum'
})
df = pd.DataFrame({
'category': np.random.choice(['Electronics', 'Clothing', 'Food'], 5_000_000),
'region': np.random.choice(['North', 'South', 'East', 'West'], 5_000_000),
'sales': np.random.rand(5_000_000) * 1000,
'profit': np.random.rand(5_000_000) * 100,
'units': np.random.randint(1, 50, 5_000_000)
})
result = multi_key_groupby(df)

Custom Aggregation

import epochly
import pandas as pd
import numpy as np
@epochly.optimize(level=3)
def custom_aggregation(df):
"""GroupBy with custom aggregation function"""
def weighted_avg(group):
return (group['value'] * group['weight']).sum() / group['weight'].sum()
return df.groupby('category').apply(weighted_avg)
df = pd.DataFrame({
'category': np.random.choice(['A', 'B', 'C'], 1_000_000),
'value': np.random.rand(1_000_000),
'weight': np.random.rand(1_000_000)
})
result = custom_aggregation(df)

Apply Operations

Row-wise Apply

import epochly
import pandas as pd
import numpy as np
@epochly.optimize(level=3)
def optimized_apply(df):
"""Optimized row-wise apply operation"""
def complex_calculation(row):
return row['value'] ** 2 + row['quantity'] * row['price']
df['computed'] = df.apply(complex_calculation, axis=1)
return df
df = pd.DataFrame({
'value': np.random.rand(1_000_000),
'quantity': np.random.randint(1, 100, 1_000_000),
'price': np.random.rand(1_000_000) * 100
})
result = optimized_apply(df)