{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load MATLAB Data with `load_matfile`\n",
    "\n",
    "This tutorial demonstrates how to use BrainTools' `load_matfile` function to efficiently load and work with MATLAB `.mat` files in Python. The function provides a convenient wrapper around `scipy.io.loadmat` with enhanced functionality for neuroscienceworkflows."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prerequisites\n",
    "\n",
    "Make sure you have the required dependencies installed:"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:27.521412Z",
     "start_time": "2025-09-26T03:35:27.517428Z"
    }
   },
   "source": [
    "# Install required packages if needed\n",
    "# !pip install scipy numpy braintools"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:30.572776Z",
     "start_time": "2025-09-26T03:35:27.527416Z"
    }
   },
   "source": [
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "from pathlib import Path\n",
    "import braintools"
   ],
   "outputs": [],
   "execution_count": 2
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creating Sample Data\n",
    "\n",
    "Let's create some sample MATLAB files that represent typical neuroscience data structures:"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.478329Z",
     "start_time": "2025-09-26T03:35:32.474975Z"
    }
   },
   "source": [
    "# Create a temporary directory for our examples\n",
    "data_dir = Path(\"tutorial_data\")\n",
    "data_dir.mkdir(exist_ok=True)\n",
    "print(f\"Created data directory: {data_dir}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created data directory: tutorial_data\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Example 1: Basic Numeric Data"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.603241Z",
     "start_time": "2025-09-26T03:35:32.491937Z"
    }
   },
   "source": [
    "# Create a simple .mat file with basic numeric data\n",
    "simple_data = {\n",
    "    'spike_times': np.array([0.123, 0.456, 0.789, 1.234]),\n",
    "    'trial_id': np.array([1, 1, 2, 2]),\n",
    "    'sampling_rate': 1000.0,\n",
    "    'duration': 2.0\n",
    "}\n",
    "\n",
    "simple_file = data_dir / \"simple_data.mat\"\n",
    "sio.savemat(simple_file, simple_data)\n",
    "print(f\"Created simple data file: {simple_file}\")\n",
    "print(f\"Original data keys: {list(simple_data.keys())}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created simple data file: tutorial_data\\simple_data.mat\n",
      "Original data keys: ['spike_times', 'trial_id', 'sampling_rate', 'duration']\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Example 2: Structured Data with Nested Fields"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.662718Z",
     "start_time": "2025-09-26T03:35:32.609798Z"
    }
   },
   "source": [
    "# Create a more complex .mat file with nested structures\n",
    "experiment_data = {\n",
    "    'session_info': {\n",
    "        'subject_id': 'S001',\n",
    "        'date': '2024-01-15',\n",
    "        'experimenter': 'Dr. Smith',\n",
    "        'notes': np.array(['good recording', 'stable electrode'], dtype=object)\n",
    "    },\n",
    "    'neural_data': {\n",
    "        'spike_trains': np.random.poisson(5, (10, 1000)),  # 10 neurons, 1000 time bins\n",
    "        'lfp': np.random.randn(1000, 4),  # 1000 samples, 4 channels\n",
    "        'timestamps': np.linspace(0, 10, 1000)  # 10 seconds of data\n",
    "    },\n",
    "    'behavior': {\n",
    "        'trial_types': np.array(['left', 'right', 'left', 'right'], dtype=object),\n",
    "        'reaction_times': np.array([0.45, 0.52, 0.41, 0.48]),\n",
    "        'correct': np.array([True, True, False, True])\n",
    "    }\n",
    "}\n",
    "\n",
    "structured_file = data_dir / \"experiment_data.mat\"\n",
    "sio.savemat(structured_file, experiment_data, do_compression=True)\n",
    "print(f\"Created structured data file: {structured_file}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created structured data file: tutorial_data\\experiment_data.mat\n"
     ]
    }
   ],
   "execution_count": 5
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Example 3: Cell Arrays and Mixed Data Types"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.710748Z",
     "start_time": "2025-09-26T03:35:32.669734Z"
    }
   },
   "source": [
    "# Create data with cell arrays (common in MATLAB)\n",
    "mixed_data = {\n",
    "    'condition_names': np.array(['baseline', 'stimulus', 'recovery'], dtype=object),\n",
    "    'trial_data': np.array([\n",
    "        np.array([1, 2, 3, 4, 5]),\n",
    "        np.array([10, 20, 30]),\n",
    "        np.array([100, 200, 300, 400])\n",
    "    ], dtype=object),\n",
    "    'metadata': {\n",
    "        'version': 1.2,\n",
    "        'params': {\n",
    "            'threshold': 0.05,\n",
    "            'window_size': 100,\n",
    "            'filters': np.array(['lowpass', 'highpass'], dtype=object)\n",
    "        }\n",
    "    }\n",
    "}\n",
    "\n",
    "mixed_file = data_dir / \"mixed_data.mat\"\n",
    "sio.savemat(mixed_file, mixed_data)\n",
    "print(f\"Created mixed data file: {mixed_file}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created mixed data file: tutorial_data\\mixed_data.mat\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading Data with `braintools.file.load_matfile`\n",
    "\n",
    "Now let's demonstrate how to load these files using BrainTools:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Basic Usage"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.771448Z",
     "start_time": "2025-09-26T03:35:32.717800Z"
    }
   },
   "source": [
    "# Load the simple data file\n",
    "loaded_simple = braintools.file.load_matfile(str(simple_file))\n",
    "\n",
    "print(\"Loaded simple data:\")\n",
    "print(f\"Keys: {list(loaded_simple.keys())}\")\n",
    "print(f\"Data types: {[(k, type(v).__name__, v.shape if hasattr(v, 'shape') else 'scalar') for k, v in loaded_simple.items()]}\")\n",
    "\n",
    "# Access the data\n",
    "print(f\"\\nSpike times: {loaded_simple['spike_times']}\")\n",
    "print(f\"Sampling rate: {loaded_simple['sampling_rate']}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded simple data:\n",
      "Keys: ['spike_times', 'trial_id', 'sampling_rate', 'duration']\n",
      "Data types: [('spike_times', 'ndarray', (4,)), ('trial_id', 'ndarray', (4,)), ('sampling_rate', 'float', 'scalar'), ('duration', 'float', 'scalar')]\n",
      "\n",
      "Spike times: [0.123 0.456 0.789 1.234]\n",
      "Sampling rate: 1000.0\n"
     ]
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Loading Structured Data"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.827393Z",
     "start_time": "2025-09-26T03:35:32.777883Z"
    }
   },
   "source": [
    "# Load the structured data file\n",
    "loaded_structured = braintools.file.load_matfile(str(structured_file))\n",
    "\n",
    "print(\"Loaded structured data:\")\n",
    "print(f\"Top-level keys: {list(loaded_structured.keys())}\")\n",
    "\n",
    "# Access nested structures\n",
    "session_info = loaded_structured['session_info']\n",
    "print(f\"\\nSession info keys: {list(session_info.keys())}\")\n",
    "print(f\"Subject ID: {session_info['subject_id']}\")\n",
    "print(f\"Notes: {session_info['notes']}\")\n",
    "\n",
    "# Access neural data\n",
    "neural_data = loaded_structured['neural_data']\n",
    "print(f\"\\nNeural data keys: {list(neural_data.keys())}\")\n",
    "print(f\"Spike trains shape: {neural_data['spike_trains'].shape}\")\n",
    "print(f\"LFP shape: {neural_data['lfp'].shape}\")\n",
    "\n",
    "# Access behavior data\n",
    "behavior = loaded_structured['behavior']\n",
    "print(f\"\\nBehavior data keys: {list(behavior.keys())}\")\n",
    "print(f\"Trial types: {behavior['trial_types']}\")\n",
    "print(f\"Reaction times: {behavior['reaction_times']}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded structured data:\n",
      "Top-level keys: ['session_info', 'neural_data', 'behavior']\n",
      "\n",
      "Session info keys: ['subject_id', 'date', 'experimenter', 'notes']\n",
      "Subject ID: S001\n",
      "Notes: ['good recording', 'stable electrode']\n",
      "\n",
      "Neural data keys: ['spike_trains', 'lfp', 'timestamps']\n",
      "Spike trains shape: (10, 1000)\n",
      "LFP shape: (1000, 4)\n",
      "\n",
      "Behavior data keys: ['trial_types', 'reaction_times', 'correct']\n",
      "Trial types: ['left', 'right', 'left', 'right']\n",
      "Reaction times: [0.45 0.52 0.41 0.48]\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Handling Cell Arrays and Mixed Data"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.887265Z",
     "start_time": "2025-09-26T03:35:32.834415Z"
    }
   },
   "source": [
    "# Load the mixed data file\n",
    "loaded_mixed = braintools.file.load_matfile(str(mixed_file))\n",
    "\n",
    "print(\"Loaded mixed data:\")\n",
    "print(f\"Keys: {list(loaded_mixed.keys())}\")\n",
    "\n",
    "# Cell arrays are converted to Python lists\n",
    "condition_names = loaded_mixed['condition_names']\n",
    "print(f\"\\nCondition names (originally cell array): {condition_names}\")\n",
    "print(f\"Type: {type(condition_names)}\")\n",
    "\n",
    "# Trial data with varying lengths\n",
    "trial_data = loaded_mixed['trial_data']\n",
    "print(f\"\\nTrial data (originally cell array):\")\n",
    "for i, trial in enumerate(trial_data):\n",
    "    print(f\"  Trial {i}: {trial} (shape: {trial.shape})\")\n",
    "\n",
    "# Nested metadata\n",
    "metadata = loaded_mixed['metadata']\n",
    "print(f\"\\nMetadata: {metadata}\")\n",
    "print(f\"Nested params: {metadata['params']}\")\n",
    "print(f\"Filters: {metadata['params']['filters']}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded mixed data:\n",
      "Keys: ['condition_names', 'trial_data', 'metadata']\n",
      "\n",
      "Condition names (originally cell array): ['baseline', 'stimulus', 'recovery']\n",
      "Type: <class 'list'>\n",
      "\n",
      "Trial data (originally cell array):\n",
      "  Trial 0: [1 2 3 4 5] (shape: (5,))\n",
      "  Trial 1: [10 20 30] (shape: (3,))\n",
      "  Trial 2: [100 200 300 400] (shape: (4,))\n",
      "\n",
      "Metadata: {'version': 1.2, 'params': {'threshold': 0.05, 'window_size': 100, 'filters': ['lowpass', 'highpass']}}\n",
      "Nested params: {'threshold': 0.05, 'window_size': 100, 'filters': ['lowpass', 'highpass']}\n",
      "Filters: ['lowpass', 'highpass']\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Including Header Information"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:32.948235Z",
     "start_time": "2025-09-26T03:35:32.895270Z"
    }
   },
   "source": [
    "# Load with header information included\n",
    "loaded_with_header = braintools.file.load_matfile(str(simple_file), header_info=False)\n",
    "\n",
    "print(\"Keys with header info:\")\n",
    "print(list(loaded_with_header.keys()))\n",
    "\n",
    "# Check the header information\n",
    "if '__header__' in loaded_with_header:\n",
    "    header = loaded_with_header['__header__']\n",
    "    print(f\"\\nHeader info: {header}\")\n",
    "    \n",
    "if '__version__' in loaded_with_header:\n",
    "    version = loaded_with_header['__version__']\n",
    "    print(f\"MATLAB version: {version}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Keys with header info:\n",
      "['__header__', '__version__', '__globals__', 'spike_times', 'trial_id', 'sampling_rate', 'duration']\n",
      "\n",
      "Header info: b'MATLAB 5.0 MAT-file Platform: nt, Created on: Fri Sep 26 11:35:32 2025'\n",
      "MATLAB version: 1.0\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Passing Additional Arguments to scipy.io.loadmat"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:33.002606Z",
     "start_time": "2025-09-26T03:35:32.953744Z"
    }
   },
   "source": [
    "# Load with custom scipy.io.loadmat parameters\n",
    "loaded_custom = braintools.file.load_matfile(\n",
    "    str(structured_file),\n",
    "    squeeze_me=False,  # Don't squeeze singleton dimensions\n",
    "    struct_as_record=True  # Load structs as record arrays instead of objects\n",
    ")\n",
    "\n",
    "print(\"Loaded with custom parameters:\")\n",
    "print(f\"Keys: {list(loaded_custom.keys())}\")\n",
    "\n",
    "# Compare the difference in structure\n",
    "session_info_custom = loaded_custom['session_info']\n",
    "print(f\"\\nSession info type with struct_as_record=True: {type(session_info_custom)}\")\n",
    "if hasattr(session_info_custom, 'dtype'):\n",
    "    print(f\"Field names: {session_info_custom.dtype.names}\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded with custom parameters:\n",
      "Keys: ['session_info', 'neural_data', 'behavior']\n",
      "\n",
      "Session info type with struct_as_record=True: <class 'numpy.ndarray'>\n",
      "Field names: ('subject_id', 'date', 'experimenter', 'notes')\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comparison with Standard scipy.io.loadmat"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:33.064024Z",
     "start_time": "2025-09-26T03:35:33.008645Z"
    }
   },
   "source": [
    "# Compare braintools.file.load_matfile with standard scipy.io.loadmat\n",
    "print(\"Comparison: BrainTools vs. Standard SciPy\")\n",
    "print(\"=\" * 50)\n",
    "\n",
    "# Load with scipy.io.loadmat\n",
    "scipy_loaded = sio.loadmat(str(structured_file), struct_as_record=False, squeeze_me=True)\n",
    "\n",
    "# Load with braintools\n",
    "bt_loaded = braintools.file.load_matfile(str(structured_file))\n",
    "\n",
    "print(f\"SciPy keys: {list(scipy_loaded.keys())}\")\n",
    "print(f\"BrainTools keys: {list(bt_loaded.keys())}\")\n",
    "\n",
    "# Check session info structure\n",
    "scipy_session = scipy_loaded['session_info']\n",
    "bt_session = bt_loaded['session_info']\n",
    "\n",
    "print(f\"\\nSciPy session_info type: {type(scipy_session)}\")\n",
    "print(f\"BrainTools session_info type: {type(bt_session)}\")\n",
    "\n",
    "if hasattr(scipy_session, '_fieldnames'):\n",
    "    print(f\"SciPy session_info fields: {scipy_session._fieldnames}\")\n",
    "print(f\"BrainTools session_info keys: {list(bt_session.keys())}\")\n",
    "\n",
    "# Access notes field\n",
    "print(f\"\\nSciPy notes: {scipy_session.notes} (type: {type(scipy_session.notes)})\")\n",
    "print(f\"BrainTools notes: {bt_session['notes']} (type: {type(bt_session['notes'])})\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Comparison: BrainTools vs. Standard SciPy\n",
      "==================================================\n",
      "SciPy keys: ['__header__', '__version__', '__globals__', 'session_info', 'neural_data', 'behavior']\n",
      "BrainTools keys: ['session_info', 'neural_data', 'behavior']\n",
      "\n",
      "SciPy session_info type: <class 'scipy.io.matlab._mio5_params.mat_struct'>\n",
      "BrainTools session_info type: <class 'dict'>\n",
      "SciPy session_info fields: ['subject_id', 'date', 'experimenter', 'notes']\n",
      "BrainTools session_info keys: ['subject_id', 'date', 'experimenter', 'notes']\n",
      "\n",
      "SciPy notes: ['good recording' 'stable electrode'] (type: <class 'numpy.ndarray'>)\n",
      "BrainTools notes: ['good recording', 'stable electrode'] (type: <class 'list'>)\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-26T03:35:33.720020Z",
     "start_time": "2025-09-26T03:35:33.626113Z"
    }
   },
   "source": [
    "# Clean up tutorial files\n",
    "import shutil\n",
    "\n",
    "print(\"Cleaning up tutorial files...\")\n",
    "if data_dir.exists():\n",
    "    shutil.rmtree(data_dir)\n",
    "    print(f\"Removed {data_dir} directory\")\n",
    "    \n",
    "print(\"Tutorial complete!\")"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cleaning up tutorial files...\n",
      "Removed tutorial_data directory\n",
      "Tutorial complete!\n"
     ]
    }
   ],
   "execution_count": 18
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
