Tutorial 3: Basic Visualization¶
Overview¶
Learn to create essential biomechanical visualizations: phase averages with standard deviation bands, spaghetti plots showing all cycles, and publication-ready figures.
Learning Objectives¶
- Compute and plot phase averages
- Add standard deviation shading
- Create spaghetti plots
- Customize plots for publication
- Compare multiple conditions
Setup and Imports¶
import numpy as np
import matplotlib.pyplot as plt
from user_libs.python.locomotion_data import LocomotionData
# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
# Load and filter data
data = LocomotionData('converted_datasets/umich_2021_phase.parquet')
level_walking = data.filter(task='level_walking', subject='SUB01')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
# Load and filter data
data = pd.read_parquet('converted_datasets/umich_2021_phase.parquet')
level_walking = data[(data['task'] == 'level_walking') & (data['subject'] == 'SUB01')]
Computing Phase Averages¶
Basic Phase Average¶
# Compute average knee flexion using library method
mean_patterns = level_walking.get_mean_patterns('SUB01', 'level_walking')
knee_mean = mean_patterns['knee_flexion_angle_ipsi_rad']['mean']
knee_std = mean_patterns['knee_flexion_angle_ipsi_rad']['std']
# Convert to degrees for plotting
knee_mean_deg = np.degrees(knee_mean)
knee_std_deg = np.degrees(knee_std)
def compute_phase_average(data, variable):
"""Compute mean and std across cycles for each phase point."""
# Group by phase_percent and compute statistics
grouped = data.groupby('phase_percent')[variable]
mean_curve = grouped.mean()
std_curve = grouped.std()
return mean_curve, std_curve
# Compute average knee flexion
knee_mean, knee_std = compute_phase_average(level_walking, 'knee_flexion_angle_ipsi_rad')
# Convert to degrees for plotting
knee_mean_deg = np.degrees(knee_mean)
knee_std_deg = np.degrees(knee_std)
Using Built-in Methods¶
# Using LocomotionData methods
mean_patterns = level_walking.get_mean_patterns('SUB01', 'level_walking')
# Access specific variable
knee_mean = mean_patterns['knee_flexion_angle_ipsi_rad']['mean']
# Get all statistics at once
stats = level_walking.get_statistics('SUB01', 'level_walking')
# Manual computation with pandas
mean_patterns = {}
for col in level_walking.columns:
if col not in ['subject', 'task', 'cycle_id', 'phase_percent']:
mean_patterns[col] = {
'mean': level_walking.groupby('phase_percent')[col].mean(),
'std': level_walking.groupby('phase_percent')[col].std()
}
# Access specific variable
knee_mean = mean_patterns['knee_flexion_angle_ipsi_rad']['mean']
Creating Basic Plots¶
Phase Average with Standard Deviation¶
def plot_phase_average_with_std(phase_percent, mean, std, title='', ylabel='', xlabel='Gait Cycle (%)'):
"""Create phase average plot with ±1 SD shading."""
fig, ax = plt.subplots(figsize=(10, 6))
# Plot mean line
ax.plot(phase_percent, mean, 'b-', linewidth=2, label='Mean')
# Add standard deviation shading
ax.fill_between(phase_percent,
mean - std,
mean + std,
alpha=0.3,
color='blue',
label='±1 SD')
# Formatting
ax.set_xlabel(xlabel, fontsize=12)
ax.set_ylabel(ylabel, fontsize=12)
ax.set_title(title, fontsize=14, fontweight='bold')
ax.legend(loc='best')
ax.grid(True, alpha=0.3)
# Set x-axis limits
ax.set_xlim(0, 100)
return fig, ax
# Create the plot
phase_percent = knee_mean_deg.index
fig, ax = plot_phase_average_with_std(
phase_percent,
knee_mean_deg,
knee_std_deg,
title='Knee Flexion During Level Walking',
ylabel='Knee Flexion (degrees)'
)
plt.show()
Multiple Standard Deviation Bands¶
def plot_with_confidence_bands(phase_percent, mean, std, title=''):
"""Plot with multiple confidence intervals."""
fig, ax = plt.subplots(figsize=(10, 6))
# Plot mean
ax.plot(phase_percent, mean, 'b-', linewidth=2, label='Mean')
# Add multiple SD bands
ax.fill_between(phase_percent, mean - 2*std, mean + 2*std,
alpha=0.15, color='blue', label='±2 SD (95%)')
ax.fill_between(phase_percent, mean - std, mean + std,
alpha=0.3, color='blue', label='±1 SD (68%)')
ax.set_xlabel('Gait Cycle (%)')
ax.set_ylabel('Angle (degrees)')
ax.set_title(title)
ax.legend()
ax.grid(True, alpha=0.3)
return fig, ax
Spaghetti Plots¶
Plot All Cycles¶
def create_spaghetti_plot(data, variable, title=''):
"""Plot all individual cycles overlaid."""
fig, ax = plt.subplots(figsize=(10, 6))
# Plot each cycle
for cycle_id in data['cycle_id'].unique():
cycle_data = data[data['cycle_id'] == cycle_id]
values = np.degrees(cycle_data[variable].values) # Convert to degrees
phase = cycle_data['phase_percent'].values
ax.plot(phase, values, alpha=0.3, linewidth=0.5, color='gray')
# Add mean on top
mean_curve = data.groupby('phase_percent')[variable].mean()
ax.plot(mean_curve.index, np.degrees(mean_curve.values),
'r-', linewidth=2, label='Mean')
ax.set_xlabel('Gait Cycle (%)')
ax.set_ylabel('Angle (degrees)')
ax.set_title(title)
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim(0, 100)
return fig, ax
# Create spaghetti plot
fig, ax = create_spaghetti_plot(
level_walking,
'knee_flexion_angle_ipsi_rad',
'All Knee Flexion Cycles - Level Walking'
)
plt.show()
Spaghetti with Highlighted Mean¶
def spaghetti_with_highlights(data, variable, highlight_outliers=True):
"""Spaghetti plot with mean and outlier detection."""
fig, ax = plt.subplots(figsize=(12, 6))
# Calculate statistics for outlier detection
all_cycles = []
for cycle_id in data['cycle_id'].unique():
cycle_data = data[data['cycle_id'] == cycle_id]
values = np.degrees(cycle_data[variable].values)
all_cycles.append(values)
all_cycles = np.array(all_cycles)
mean_cycle = np.mean(all_cycles, axis=0)
std_cycle = np.std(all_cycles, axis=0)
# Plot individual cycles
for i, cycle in enumerate(all_cycles):
# Check if cycle is outlier (>2 SD from mean at any point)
is_outlier = np.any(np.abs(cycle - mean_cycle) > 2 * std_cycle)
if is_outlier and highlight_outliers:
ax.plot(range(150), cycle, 'r-', alpha=0.5, linewidth=0.5)
else:
ax.plot(range(150), cycle, 'gray', alpha=0.3, linewidth=0.5)
# Plot mean
ax.plot(range(150), mean_cycle, 'b-', linewidth=3, label='Mean')
# Add SD bands
ax.fill_between(range(150),
mean_cycle - std_cycle,
mean_cycle + std_cycle,
alpha=0.2, color='blue')
ax.set_xlabel('Gait Cycle (%)')
ax.set_ylabel('Angle (degrees)')
ax.set_title(f'{variable} - All Cycles with Outliers Highlighted')
ax.legend()
# Convert x-axis to percentage
ax.set_xticks(np.linspace(0, 150, 6))
ax.set_xticklabels(['0', '20', '40', '60', '80', '100'])
return fig, ax
Comparing Conditions¶
Side-by-Side Comparison¶
def compare_conditions(data, variable, conditions_dict):
"""Compare multiple conditions side by side."""
fig, axes = plt.subplots(1, len(conditions_dict), figsize=(15, 5))
if len(conditions_dict) == 1:
axes = [axes]
for ax, (condition_name, condition_filter) in zip(axes, conditions_dict.items()):
# Filter data
condition_data = data[condition_filter]
# Compute mean and std
mean = condition_data.groupby('phase_percent')[variable].mean()
std = condition_data.groupby('phase_percent')[variable].std()
# Convert to degrees
mean_deg = np.degrees(mean)
std_deg = np.degrees(std)
# Plot
ax.plot(mean_deg.index, mean_deg.values, 'b-', linewidth=2)
ax.fill_between(mean_deg.index,
mean_deg - std_deg,
mean_deg + std_deg,
alpha=0.3)
ax.set_title(condition_name)
ax.set_xlabel('Gait Cycle (%)')
ax.set_ylabel('Angle (degrees)')
ax.set_xlim(0, 100)
ax.grid(True, alpha=0.3)
plt.tight_layout()
return fig
# Example: Compare different walking conditions
conditions = {
'Level Walking': data['task'] == 'level_walking',
'Incline Walking': data['task'] == 'incline_walking',
'Decline Walking': data['task'] == 'decline_walking'
}
fig = compare_conditions(data[data['subject'] == 'SUB01'],
'knee_flexion_angle_ipsi_rad',
conditions)
plt.show()
Overlay Comparison¶
def overlay_conditions(data, variable, conditions_dict, colors=None):
"""Overlay multiple conditions on same plot."""
fig, ax = plt.subplots(figsize=(10, 6))
if colors is None:
colors = plt.cm.Set1(np.linspace(0, 1, len(conditions_dict)))
for i, (condition_name, condition_filter) in enumerate(conditions_dict.items()):
# Filter and compute statistics
condition_data = data[condition_filter]
mean = condition_data.groupby('phase_percent')[variable].mean()
std = condition_data.groupby('phase_percent')[variable].std()
# Convert to degrees
mean_deg = np.degrees(mean)
std_deg = np.degrees(std)
# Plot
ax.plot(mean_deg.index, mean_deg.values,
color=colors[i], linewidth=2, label=condition_name)
ax.fill_between(mean_deg.index,
mean_deg - std_deg,
mean_deg + std_deg,
alpha=0.2, color=colors[i])
ax.set_xlabel('Gait Cycle (%)', fontsize=12)
ax.set_ylabel('Angle (degrees)', fontsize=12)
ax.set_title(f'{variable.replace("_", " ").title()}', fontsize=14)
ax.legend(loc='best')
ax.grid(True, alpha=0.3)
ax.set_xlim(0, 100)
return fig, ax
# Create overlay plot
fig, ax = overlay_conditions(
data[data['subject'] == 'SUB01'],
'knee_flexion_angle_ipsi_rad',
conditions
)
plt.show()
Publication-Ready Figures¶
Professional Styling¶
def create_publication_plot(data, variable, title='', save_path=None):
"""Create publication-quality figure."""
# Set publication style
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 10
fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
# Compute statistics
mean = data.groupby('phase_percent')[variable].mean()
std = data.groupby('phase_percent')[variable].std()
sem = data.groupby('phase_percent')[variable].sem() # Standard error
# Convert to degrees
mean_deg = np.degrees(mean)
std_deg = np.degrees(std)
sem_deg = np.degrees(sem)
# Plot with confidence interval
ax.plot(mean_deg.index, mean_deg.values,
'k-', linewidth=1.5, label='Mean')
# Use SEM for tighter confidence bands in publications
ax.fill_between(mean_deg.index,
mean_deg - 1.96*sem_deg, # 95% CI
mean_deg + 1.96*sem_deg,
alpha=0.3, color='gray',
label='95% CI')
# Formatting
ax.set_xlabel('Gait Cycle (%)', fontsize=11, fontweight='bold')
ax.set_ylabel('Angle (°)', fontsize=11, fontweight='bold')
if title:
ax.set_title(title, fontsize=12, fontweight='bold', pad=20)
# Clean up axes
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xlim(0, 100)
# Add vertical lines for gait events (typical values)
ax.axvline(x=60, color='gray', linestyle='--', alpha=0.5, linewidth=0.5)
ax.text(60, ax.get_ylim()[0], 'Toe-off', ha='center', va='bottom', fontsize=8)
ax.legend(frameon=False, loc='upper right')
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
return fig, ax
Practice Exercises¶
Exercise 1: Multi-Variable Plot¶
Create a figure showing hip, knee, and ankle flexion on the same plot with different colors.
Exercise 2: Bilateral Comparison¶
Plot ipsilateral vs contralateral knee flexion with appropriate labeling.
Exercise 3: Statistical Bands¶
Create a plot showing mean, median, and quartiles instead of mean ± SD.
Key Takeaways¶
- Always compute statistics properly - Group by phase_percent
- Convert units for display - Radians to degrees for angles
- Use transparency - Alpha values for overlapping data
- Label everything - Axes, units, conditions
- Save high-resolution - 300 DPI for publications
Next Steps¶
Continue to Tutorial 4: Cycle Analysis →
Learn to extract and analyze individual gait cycles, calculate ROM, peak values, and timing metrics.