# -*- coding:utf-8 -*-
"""
Eyelink Data Parsing Module
This module is designed for parsing Eyelink ASC data. It provides functionalities to parse messages,
samples, fixations, saccades, and blinks.
"""
import pandas as pd
import numpy as np
from ..external.edfreader import read_edf
from intervaltree import Interval, IntervalTree
[docs]
class EyelinkReader:
"""
A class to read and parse Eyelink eye tracking data files.
This class handles loading and parsing of Eyelink data files, providing methods to extract messages,
samples, fixations, saccades and blinks. It supports customizable message formats and additional
column specifications.
Parameters
----------
path : str
Path to the Eyelink data file
start_msg : str
Common part of message marking the start of a trial. For example, if your trial start messages are
'TRIAL_START 1 1', 'TRIAL_START 1 2', etc., then start_msg would be 'TRIAL_START'
stop_msg : str
Common part of message marking the end of a trial. For example, if your trial end messages are
'TRIAL_END 1 1', 'TRIAL_END 1 2', etc., then stop_msg would be 'TRIAL_END'
msg_format : dict
Dictionary specifying the format of messages. The messages will be parsed based on this format.
Example: {'marker': str, 'event': str, 'block': int, 'trial': int}
delimiter : str
Character used to separate message components. For example, if messages are formatted as 'TRIAL_END 1 1',
the delimiter would be ' '.
add_cols : dict, optional
Additional columns to add to output DataFrames. The dictionary should be in the format {'column_name': column_data}.
For example, to add a column 'subject' with value 'S01' to all rows, use {'subject': 'S01'}.
progress_bar : bool, optional
If True, shows a progress bar while reading the data file. Default is True.
Attributes
----------
data : pd.DataFrame
Raw unformatted Eyelink data
messages : pd.DataFrame
Extracted messages from the data file
metadata : dict
Metadata from the Eyelink data file
Examples
--------
>>> reader = EyelinkReader(
... path='subject01.asc',
... start_msg='TRIAL_START',
... stop_msg='TRIAL_END',
... msg_format={'marker': str, 'event': str, 'block': int, 'trial': int},
... delimiter=' '
... )
"""
def __init__(self, path, start_msg, stop_msg, msg_format, delimiter, add_cols=None, progress_bar=True):
"""
Initialize EyelinkReader for processing eye tracking data.
Parameters
----------
path : str
Path to the Eyelink data file
start_msg : str
Common part of message marking the start of a trial. For example: 'TRIAL_START'
stop_msg : str
Common part of message marking the end of a trial. For example: 'TRIAL_END'
msg_format : dict
Dictionary specifying the format of event markers. For example:
{'marker': str, 'event': str, 'block': int, 'trial': int}
delimiter : str
Character used to separate message components. For example: ' '
add_cols : dict, optional
Additional columns to add to output DataFrames. For example:
{'subject': 'S01', 'session': 1}
progress_bar : bool, optional
If True, shows a progress bar while reading the data file. Default is True.
"""
self.path = path
self.start_msg = start_msg
self.stop_msg = stop_msg
self.msg_format = msg_format
self.delimiter = delimiter
self.add_cols = add_cols
self.data, self.metadata = self.parse_eyelink_data(progress_bar)
self.messages = self.get_messages()
[docs]
def parse_eyelink_data(self, progress_bar):
"""
Loads and parses raw Eyelink data from the specified file. A wrapper for read_edf function.
This method reads the Eyelink data file and extracts both the data and metadata.
Returns
-------
tuple
A tuple containing:
- pd.DataFrame
The parsed Eyelink data
- dict
Metadata from the Eyelink file
Notes
-----
The read_edf function is adapted from the pygaze package
https://github.com/esdalmaijer/PyGazeAnalyser
"""
data, metadata = read_edf(self.path, start=self.start_msg, stop=self.stop_msg, progress_bar=progress_bar)
return pd.DataFrame(data), metadata
[docs]
def get_messages(self):
"""
Extract and process marker events from the Eyelink dataset.
This method extracts all message events from the data and parses them according
to the specified message format and delimiter.
Returns
-------
pd.DataFrame
DataFrame containing processed message data with columns:
- id : int
Trial identifier
- trackertime : float
Eye tracker timestamps
- message : str
Raw message string
- Additional columns to store parsed message parts based on msg_format specification.
- Additional columns from self.add_cols are added if specified.
Notes
-----
Messages are split using the specified delimiter and parsed according
to the data types specified in msg_format.
"""
msg_list = [(i, time, msg.strip())
for i, event in enumerate(self.data.events)
if 'msg' in event
for time, msg in event['msg']]
df = pd.DataFrame(msg_list, columns=['id', 'trackertime', 'message'])
message_parts = df['message'].str.split(pat=self.delimiter, expand=True)
for i, col in enumerate(self.msg_format.keys()):
df[col] = message_parts[i].astype(self.msg_format[col])
# Add any additional columns
if self.add_cols:
df = df.assign(**(self.add_cols))
# sort by trackertime
return df.sort_values('trackertime')
[docs]
def get_samples(self, parse_messages=True):
"""
Extract and process raw eye tracking samples from the dataset.
This method extracts all sample data points from the Eyelink recording,
including gaze position, pupil size, and associated messages.
Parameters
----------
parse_messages : bool, optional
If True, parses the associated messages according to the predefined
message format. Default is True.
Returns
-------
pd.DataFrame
DataFrame containing processed sample data with columns:
- trialtime : float
Trial timestamps
- trackertime : float
Eye tracker timestamps
- x : float
X coordinates of gaze position
- y : float
Y coordinates of gaze position
- pp : float
Pupil size measurements (arbitrary unit; measurement unit [area/diameter]
depends on recording setting)
- msg : str
Raw message strings
- msgtime : float
Message timestamps
- Additional columns from message parsing if parse_messages=True.
- Additional columns from self.add_cols if specified.
Notes
-----
Columns are converted to the appropriate data type that supports pd.NA.
"""
# Create dataframe from sample data
df = pd.DataFrame({
'trialtime': np.concatenate(self.data.time),
'trackertime': np.concatenate(self.data.trackertime),
'x': np.concatenate(self.data.x),
'y': np.concatenate(self.data.y),
'pp': np.concatenate(self.data['size']),
'msg': np.concatenate(self.data.last_msg),
'msgtime': np.concatenate(self.data.last_msg_time),
})
# Split messages and assign to result DataFrame
if parse_messages:
message_parts = df['msg'].str.split(pat=self.delimiter, expand=True)
for i, col in enumerate(self.msg_format.keys()):
df[col] = message_parts[i].astype(self.msg_format[col])
# Add any additional columns
if self.add_cols:
df = df.assign(**self.add_cols)
return df.convert_dtypes(convert_string=True, convert_integer=True, convert_boolean=True, convert_floating=True)
[docs]
def get_fixations(self, strict=True, parse_messages=True):
"""
Extract and process fixation events from the dataset.
This method extracts all fixation events from the Eyelink recording,
including their duration, position, and associated messages.
Parameters
----------
strict : bool, optional
If True, removes "bridge" fixations: fixations with start times before an event and end times after the same event.
Default is True.
parse_messages : bool, optional
If True, parses the associated messages according to the predefined
message format. Default is True.
Returns
-------
pd.DataFrame
DataFrame containing processed fixation data with columns:
- eye : str
Eye identifier (left/right)
- starttime : float
Start time of fixation
- endtime : float
End time of fixation
- duration : float
Duration of fixation in milliseconds
- endx : float
X-coordinate at end of fixation
- endy : float
Y-coordinate at end of fixation
- msg : str
Raw message string (if parse_messages=False)
- msgtime : float
Message timestamp
- Additional columns from message parsing if parse_messages=True.
- Additional columns from self.add_cols if specified.
Notes
-----
Columns are converted to the appropriate data type that supports pd.NA.
If strict=True, fixations starting before their associated trial message
are removed from the output.
"""
# Extract fixations
s = self.data.events.apply(lambda x: x['Efix']).explode().dropna()
df = pd.DataFrame(s.tolist(), columns=['eye', 'starttime', 'endtime', 'duration', 'endx', 'endy','msg','msgtime'])
# Remove pre-trial fixations
if strict:
df = df[df.starttime.astype(float) >= df.msgtime.astype(float)].reset_index(drop=True)
# Split messages and assign to result DataFrame
if parse_messages:
message_parts = df['msg'].str.split(pat=self.delimiter, expand=True)
for i, col in enumerate(self.msg_format.keys()):
df[col] = message_parts[i].astype(self.msg_format[col])
# Add any additional columns
if self.add_cols:
df = df.assign(**(self.add_cols))
return df.convert_dtypes(convert_string=True, convert_integer=True, convert_boolean=True, convert_floating=True)
[docs]
def get_saccades(self, strict=True, remove_blinks=True, srt=True, parse_messages=True):
"""
Extract and process saccadic eye movements from the dataset.
This method extracts all saccade information from the dataset,
with the option to remove saccades that overlap with blinks and calculate
saccade reaction times.
Parameters
----------
strict : bool, optional
If True, removes "bridge" saccades: saccades with start times before an event and end times after the same event.
Default is True.
remove_blinks : bool, optional
If True, removes saccades that overlap with blink periods. This is recommended for Eyelink data as Eyelink embeds a blink inside a saccade.
Default is True.
srt : bool, optional
If True, calculates saccade reaction time (srt) as the difference between
saccade start time and message timestamp. Default is True.
parse_messages : bool, optional
If True, parses the associated messages according to the predefined
message format. Default is True.
Returns
-------
pd.DataFrame
DataFrame containing processed saccade data with columns:
- eye : str
Eye identifier (left/right)
- starttime : float
Start time of saccade
- endtime : float
End time of saccade
- duration : float
Duration of saccade in milliseconds
- startx : float
Starting X coordinate
- starty : float
Starting Y coordinate
- endx : float
Ending X coordinate
- endy : float
Ending Y coordinate
- ampl : float
Amplitude of saccade in degrees
- pv : float
Peak velocity in degrees/second
- msg : str
Associated message (if parse_messages=False)
- msgtime : float
Message timestamp
- srt : float
Saccade reaction time (if srt=True)
- Additional columns from message parsing if parse_messages=True.
- Additional columns from self.add_cols if specified.
Notes
-----
- Columns are converted to the appropriate data type that supports pd.NA.
- If remove_blinks=True, saccades overlapping with blinks are removed
- If strict=True, saccades starting before trial message are removed
- Saccade reaction time (srt) is calculated as starttime - msgtime
"""
# Get saccades data
saccades = self.data.events.apply(lambda x: x['Esac']).explode().dropna()
df = pd.DataFrame(saccades.tolist(),
columns=['eye', 'starttime', 'endtime', 'duration',
'startx', 'starty', 'endx', 'endy', 'ampl', 'pv','msg','msgtime'])
# remove blinks
if remove_blinks:
df = self._scrub_blinks(df, self.get_blinks(strict=False))
# Remove pre-trial saccades
if strict:
df = df[df.starttime.astype(float) >= df.msgtime.astype(float)].reset_index(drop=True)
# compute saccade reaction time
if srt:
df['srt'] = df.starttime.astype(float) - df.msgtime.astype(float)
# Split messages and assign to result DataFrame
if parse_messages:
message_parts = df['msg'].str.split(pat=self.delimiter, expand=True)
for i, col in enumerate(self.msg_format.keys()):
df[col] = message_parts[i].astype(self.msg_format[col])
# Add any additional columns
if self.add_cols:
df = df.assign(**(self.add_cols))
return df.convert_dtypes(convert_string=True, convert_integer=True, convert_boolean=True, convert_floating=True)
[docs]
def get_blinks(self, strict=True, parse_messages=True):
"""
Extract and process blink events from the dataset.
This method extracts all blink events from the Eyelink recording,
including their duration and associated messages.
Parameters
----------
strict : bool, optional
If True, removes "bridge" blinks: blinks with start times before an event and end times after the same event.
Default is True.
parse_messages : bool, optional
If True, parses the associated messages according to the predefined
message format. Default is True.
Returns
-------
pd.DataFrame
DataFrame containing processed blink data with columns:
- eye : str
Eye identifier (left/right)
- starttime : float
Start time of blink
- endtime : float
End time of blink
- duration : float
Duration of blink in milliseconds
- msg : str
Message string (if parse_messages=False)
- msgtime : float
Message timestamp
- Additional columns from message parsing if parse_messages=True.
- Additional columns from self.add_cols if specified.
Notes
-----
- Columns are converted to the appropriate data type that supports pd.NA.
- Blinks are detected by Eyelink's algorithm.
"""
blinks = self.data.events.apply(lambda x: x['Eblk']).explode().dropna()
df = pd.DataFrame(blinks.tolist(), columns=['eye','starttime','endtime','duration','msg','msgtime'])
# Remove pre-trial saccades
if strict:
df = df[df.starttime.astype(float) >= df.msgtime.astype(float)].reset_index(drop=True)
# Split messages and assign to result DataFrame
if parse_messages:
message_parts = df['msg'].str.split(pat=self.delimiter, expand=True)
for i, col in enumerate(self.msg_format.keys()):
df[col] = message_parts[i].astype(self.msg_format[col])
return df.convert_dtypes(convert_string=True, convert_integer=True, convert_boolean=True, convert_floating=True)
def _scrub_blinks(self, sac, blk):
"""
Filter out saccades that overlap with blinks in the dataset.
This method creates an interval tree from blink periods and removes any
saccades that overlap with these periods.
Parameters
----------
sac : pd.DataFrame
DataFrame containing saccade data with 'starttime' and 'endtime' columns
blk : pd.DataFrame
DataFrame containing blink data with 'starttime' and 'endtime' columns
Returns
-------
pd.DataFrame
Filtered saccade DataFrame with blink-overlapping saccades removed,
with index reset to default integer index.
Notes
-----
Uses an interval tree for efficient overlap detection between saccades
and blinks. Saccades that start or end during a blink period are removed.
"""
# Create interval tree of blinks
tree = IntervalTree()
for _, row in blk.iterrows():
if row['starttime'] < row['endtime']:
tree.add(Interval(row['starttime'], row['endtime']))
# Filter out saccades that overlap with blinks
mask = [not tree.overlaps(row['starttime'], row['endtime']) for _, row in sac.iterrows()]
return sac[mask].reset_index(drop=True)