Load MATLAB Data with load_matfile#

This tutorial demonstrates how to use BrainTools’ load_matfile function to efficiently load and work with MATLAB .mat files in Python. The function provides a convenient wrapper around scipy.io.loadmat with enhanced functionality for neuroscienceworkflows.

Prerequisites#

Make sure you have the required dependencies installed:

# Install required packages if needed
# !pip install scipy numpy braintools
import numpy as np
import scipy.io as sio
from pathlib import Path
import braintools

Creating Sample Data#

Let’s create some sample MATLAB files that represent typical neuroscience data structures:

# Create a temporary directory for our examples
data_dir = Path("tutorial_data")
data_dir.mkdir(exist_ok=True)
print(f"Created data directory: {data_dir}")
Created data directory: tutorial_data

Example 1: Basic Numeric Data#

# Create a simple .mat file with basic numeric data
simple_data = {
    'spike_times': np.array([0.123, 0.456, 0.789, 1.234]),
    'trial_id': np.array([1, 1, 2, 2]),
    'sampling_rate': 1000.0,
    'duration': 2.0
}

simple_file = data_dir / "simple_data.mat"
sio.savemat(simple_file, simple_data)
print(f"Created simple data file: {simple_file}")
print(f"Original data keys: {list(simple_data.keys())}")
Created simple data file: tutorial_data\simple_data.mat
Original data keys: ['spike_times', 'trial_id', 'sampling_rate', 'duration']

Example 2: Structured Data with Nested Fields#

# Create a more complex .mat file with nested structures
experiment_data = {
    'session_info': {
        'subject_id': 'S001',
        'date': '2024-01-15',
        'experimenter': 'Dr. Smith',
        'notes': np.array(['good recording', 'stable electrode'], dtype=object)
    },
    'neural_data': {
        'spike_trains': np.random.poisson(5, (10, 1000)),  # 10 neurons, 1000 time bins
        'lfp': np.random.randn(1000, 4),  # 1000 samples, 4 channels
        'timestamps': np.linspace(0, 10, 1000)  # 10 seconds of data
    },
    'behavior': {
        'trial_types': np.array(['left', 'right', 'left', 'right'], dtype=object),
        'reaction_times': np.array([0.45, 0.52, 0.41, 0.48]),
        'correct': np.array([True, True, False, True])
    }
}

structured_file = data_dir / "experiment_data.mat"
sio.savemat(structured_file, experiment_data, do_compression=True)
print(f"Created structured data file: {structured_file}")
Created structured data file: tutorial_data\experiment_data.mat

Example 3: Cell Arrays and Mixed Data Types#

# Create data with cell arrays (common in MATLAB)
mixed_data = {
    'condition_names': np.array(['baseline', 'stimulus', 'recovery'], dtype=object),
    'trial_data': np.array([
        np.array([1, 2, 3, 4, 5]),
        np.array([10, 20, 30]),
        np.array([100, 200, 300, 400])
    ], dtype=object),
    'metadata': {
        'version': 1.2,
        'params': {
            'threshold': 0.05,
            'window_size': 100,
            'filters': np.array(['lowpass', 'highpass'], dtype=object)
        }
    }
}

mixed_file = data_dir / "mixed_data.mat"
sio.savemat(mixed_file, mixed_data)
print(f"Created mixed data file: {mixed_file}")
Created mixed data file: tutorial_data\mixed_data.mat

Loading Data with braintools.file.load_matfile#

Now let’s demonstrate how to load these files using BrainTools:

Basic Usage#

# Load the simple data file
loaded_simple = braintools.file.load_matfile(str(simple_file))

print("Loaded simple data:")
print(f"Keys: {list(loaded_simple.keys())}")
print(f"Data types: {[(k, type(v).__name__, v.shape if hasattr(v, 'shape') else 'scalar') for k, v in loaded_simple.items()]}")

# Access the data
print(f"\nSpike times: {loaded_simple['spike_times']}")
print(f"Sampling rate: {loaded_simple['sampling_rate']}")
Loaded simple data:
Keys: ['spike_times', 'trial_id', 'sampling_rate', 'duration']
Data types: [('spike_times', 'ndarray', (4,)), ('trial_id', 'ndarray', (4,)), ('sampling_rate', 'float', 'scalar'), ('duration', 'float', 'scalar')]

Spike times: [0.123 0.456 0.789 1.234]
Sampling rate: 1000.0

Loading Structured Data#

# Load the structured data file
loaded_structured = braintools.file.load_matfile(str(structured_file))

print("Loaded structured data:")
print(f"Top-level keys: {list(loaded_structured.keys())}")

# Access nested structures
session_info = loaded_structured['session_info']
print(f"\nSession info keys: {list(session_info.keys())}")
print(f"Subject ID: {session_info['subject_id']}")
print(f"Notes: {session_info['notes']}")

# Access neural data
neural_data = loaded_structured['neural_data']
print(f"\nNeural data keys: {list(neural_data.keys())}")
print(f"Spike trains shape: {neural_data['spike_trains'].shape}")
print(f"LFP shape: {neural_data['lfp'].shape}")

# Access behavior data
behavior = loaded_structured['behavior']
print(f"\nBehavior data keys: {list(behavior.keys())}")
print(f"Trial types: {behavior['trial_types']}")
print(f"Reaction times: {behavior['reaction_times']}")
Loaded structured data:
Top-level keys: ['session_info', 'neural_data', 'behavior']

Session info keys: ['subject_id', 'date', 'experimenter', 'notes']
Subject ID: S001
Notes: ['good recording', 'stable electrode']

Neural data keys: ['spike_trains', 'lfp', 'timestamps']
Spike trains shape: (10, 1000)
LFP shape: (1000, 4)

Behavior data keys: ['trial_types', 'reaction_times', 'correct']
Trial types: ['left', 'right', 'left', 'right']
Reaction times: [0.45 0.52 0.41 0.48]

Handling Cell Arrays and Mixed Data#

# Load the mixed data file
loaded_mixed = braintools.file.load_matfile(str(mixed_file))

print("Loaded mixed data:")
print(f"Keys: {list(loaded_mixed.keys())}")

# Cell arrays are converted to Python lists
condition_names = loaded_mixed['condition_names']
print(f"\nCondition names (originally cell array): {condition_names}")
print(f"Type: {type(condition_names)}")

# Trial data with varying lengths
trial_data = loaded_mixed['trial_data']
print(f"\nTrial data (originally cell array):")
for i, trial in enumerate(trial_data):
    print(f"  Trial {i}: {trial} (shape: {trial.shape})")

# Nested metadata
metadata = loaded_mixed['metadata']
print(f"\nMetadata: {metadata}")
print(f"Nested params: {metadata['params']}")
print(f"Filters: {metadata['params']['filters']}")
Loaded mixed data:
Keys: ['condition_names', 'trial_data', 'metadata']

Condition names (originally cell array): ['baseline', 'stimulus', 'recovery']
Type: <class 'list'>

Trial data (originally cell array):
  Trial 0: [1 2 3 4 5] (shape: (5,))
  Trial 1: [10 20 30] (shape: (3,))
  Trial 2: [100 200 300 400] (shape: (4,))

Metadata: {'version': 1.2, 'params': {'threshold': 0.05, 'window_size': 100, 'filters': ['lowpass', 'highpass']}}
Nested params: {'threshold': 0.05, 'window_size': 100, 'filters': ['lowpass', 'highpass']}
Filters: ['lowpass', 'highpass']

Advanced Features#

Including Header Information#

# Load with header information included
loaded_with_header = braintools.file.load_matfile(str(simple_file), header_info=False)

print("Keys with header info:")
print(list(loaded_with_header.keys()))

# Check the header information
if '__header__' in loaded_with_header:
    header = loaded_with_header['__header__']
    print(f"\nHeader info: {header}")
    
if '__version__' in loaded_with_header:
    version = loaded_with_header['__version__']
    print(f"MATLAB version: {version}")
Keys with header info:
['__header__', '__version__', '__globals__', 'spike_times', 'trial_id', 'sampling_rate', 'duration']

Header info: b'MATLAB 5.0 MAT-file Platform: nt, Created on: Fri Sep 26 11:35:32 2025'
MATLAB version: 1.0

Passing Additional Arguments to scipy.io.loadmat#

# Load with custom scipy.io.loadmat parameters
loaded_custom = braintools.file.load_matfile(
    str(structured_file),
    squeeze_me=False,  # Don't squeeze singleton dimensions
    struct_as_record=True  # Load structs as record arrays instead of objects
)

print("Loaded with custom parameters:")
print(f"Keys: {list(loaded_custom.keys())}")

# Compare the difference in structure
session_info_custom = loaded_custom['session_info']
print(f"\nSession info type with struct_as_record=True: {type(session_info_custom)}")
if hasattr(session_info_custom, 'dtype'):
    print(f"Field names: {session_info_custom.dtype.names}")
Loaded with custom parameters:
Keys: ['session_info', 'neural_data', 'behavior']

Session info type with struct_as_record=True: <class 'numpy.ndarray'>
Field names: ('subject_id', 'date', 'experimenter', 'notes')

Comparison with Standard scipy.io.loadmat#

# Compare braintools.file.load_matfile with standard scipy.io.loadmat
print("Comparison: BrainTools vs. Standard SciPy")
print("=" * 50)

# Load with scipy.io.loadmat
scipy_loaded = sio.loadmat(str(structured_file), struct_as_record=False, squeeze_me=True)

# Load with braintools
bt_loaded = braintools.file.load_matfile(str(structured_file))

print(f"SciPy keys: {list(scipy_loaded.keys())}")
print(f"BrainTools keys: {list(bt_loaded.keys())}")

# Check session info structure
scipy_session = scipy_loaded['session_info']
bt_session = bt_loaded['session_info']

print(f"\nSciPy session_info type: {type(scipy_session)}")
print(f"BrainTools session_info type: {type(bt_session)}")

if hasattr(scipy_session, '_fieldnames'):
    print(f"SciPy session_info fields: {scipy_session._fieldnames}")
print(f"BrainTools session_info keys: {list(bt_session.keys())}")

# Access notes field
print(f"\nSciPy notes: {scipy_session.notes} (type: {type(scipy_session.notes)})")
print(f"BrainTools notes: {bt_session['notes']} (type: {type(bt_session['notes'])})")
Comparison: BrainTools vs. Standard SciPy
==================================================
SciPy keys: ['__header__', '__version__', '__globals__', 'session_info', 'neural_data', 'behavior']
BrainTools keys: ['session_info', 'neural_data', 'behavior']

SciPy session_info type: <class 'scipy.io.matlab._mio5_params.mat_struct'>
BrainTools session_info type: <class 'dict'>
SciPy session_info fields: ['subject_id', 'date', 'experimenter', 'notes']
BrainTools session_info keys: ['subject_id', 'date', 'experimenter', 'notes']

SciPy notes: ['good recording' 'stable electrode'] (type: <class 'numpy.ndarray'>)
BrainTools notes: ['good recording', 'stable electrode'] (type: <class 'list'>)
# Clean up tutorial files
import shutil

print("Cleaning up tutorial files...")
if data_dir.exists():
    shutil.rmtree(data_dir)
    print(f"Removed {data_dir} directory")
    
print("Tutorial complete!")
Cleaning up tutorial files...
Removed tutorial_data directory
Tutorial complete!