Source code for ipfx.dataset.ephys_data_set

from typing import (
    Optional, List, Dict, Tuple, Collection, Sequence, Union
)
import logging
from collections import defaultdict
import copy as cp

import pandas as pd
import numpy as np

from allensdk.deprecated import deprecated

from ipfx.dataset.ephys_data_interface import EphysDataInterface
from ipfx.stimulus import StimulusOntology
from ipfx.sweep import Sweep, SweepSet


[docs]class EphysDataSet(object):

    STIMULUS_UNITS = 'stimulus_units'
    STIMULUS_CODE = 'stimulus_code'
    STIMULUS_AMPLITUDE = 'stimulus_amplitude'
    STIMULUS_NAME = 'stimulus_name'
    SWEEP_NUMBER = 'sweep_number'
    CLAMP_MODE = 'clamp_mode'

    COLUMN_NAMES = [STIMULUS_UNITS,
                    STIMULUS_CODE,
                    STIMULUS_AMPLITUDE,
                    STIMULUS_NAME,
                    CLAMP_MODE,
                    SWEEP_NUMBER,
                    ]

    VOLTAGE_CLAMP = "VoltageClamp"
    CURRENT_CLAMP = "CurrentClamp"

    @property
    def ontology(self) -> StimulusOntology:
        """The stimulus ontology maps codified description of the stimulus type 
        to the human-readable descriptions.
        """
        return self._data.ontology

    @property
    def sweep_table(self) -> pd.DataFrame:
        """Each row of the sweep table contains the metadata for a single 
        sweep. In particular details of the stimulus presented and the clamp 
        mode. See EphysDataInterface.get_sweep_metadata for more information.

        """
        if not hasattr(self, "_sweep_table"):
            sweeps: List[Dict] = []
            for num in self._data.sweep_numbers:
                current = self._data.get_sweep_metadata(num)

                if self._sweep_info:
                    info = self._sweep_info.get(num, None)
                    if info is None:
                        continue
                    current.update(info)
                sweeps.append(current)

            self._sweep_table = pd.DataFrame(sweeps)
        return self._sweep_table

    @property
    def sweep_info(self):
        return list(self._sweep_info.values())

    @sweep_info.setter
    def sweep_info(self, value):
        if not isinstance(value, dict):
            self._sweep_info: Dict = {}
            for sweep in value:
                self._sweep_info[sweep["sweep_number"]] = sweep
        else:
            self._sweep_info = value
        
        if hasattr(self, "_sweep_table"):
            del self._sweep_table

    def __init__(
            self,
            data: EphysDataInterface,
            sweep_info: Optional[List[Dict]] = None
    ):
        """EphysDataSet is the preferred interface for running analyses or 
        pipeline code.

        Parameters
        ----------
        data : This object must implement the EphysDataInterface. It will 
            handle any loading of data from external sources (such as NWB2 
            files)
        """
        self._data: EphysDataInterface = data
        self.sweep_info = sweep_info or []

    def _setup_stimulus_repeat_lookup(self):
        """Each sweep contains the ith repetition of some stimulus (from 1 -> 
        the number of times that stimulus was presented). Find i for each 
        sweep.

        Notes
        -----
        see get_stim_code_ext for use

        """
        stimulus_counters = defaultdict(int)
        self._stimulus_repeat_lookup = {}

        for sweep_number in self._data.sweep_numbers:
            code = self.get_stimulus_code(sweep_number)
            stimulus_counters[code] += 1
            self._stimulus_repeat_lookup[sweep_number] = \
                stimulus_counters[code]

[docs]    def filtered_sweep_table(
            self,
            clamp_mode: Optional[str] = None,
            stimuli: Optional[Collection[str]] = None,
            stimuli_exclude: Optional[Collection[str]] = None,
    ) -> pd.DataFrame:
        """Utility for filtering the sweep table

        Parameters
        ----------
        clamp_mode: filter to one of self.VOLTAGE_CLAMP or self.CURRENT_CLAMP
        stimuli: filter to sweeps presenting these stimuli (codes)
        stimuli_exclude: filter to sweeps not presenting these stimuli

        Returns
        -------
        filtered sweep table
        """
        st = self.sweep_table

        if clamp_mode:
            mask = st[self.CLAMP_MODE] == clamp_mode
            st = st[mask.astype(bool)]

        if stimuli:
            mask = st[self.STIMULUS_CODE].apply(
                self.ontology.stimulus_has_any_tags, 
                args=(stimuli,), 
                tag_type="code"
            )
            st = st[mask.astype(bool)]

        if stimuli_exclude:
            mask = ~st[self.STIMULUS_CODE].apply(
                self.ontology.stimulus_has_any_tags, 
                args=(stimuli_exclude,), 
                tag_type="code"
            )
            st = st[mask.astype(bool)]

        return st

[docs]    def get_sweep_numbers(
            self,
            stimuli: Collection[str] = None,
            clamp_mode: Optional[str] = None
    ) -> List[int]:
        """Return the integer identifier of all sweeps matching argued criteria

        Parameters
        ----------
        stimuli : filter to  sweeps presenting these stimuli (codes)
        clamp_mode : filter to sweeps of this clamp mode

        Returns
        -------
        A list of sweep numbers matching these criteria
        """

        sweeps = self.filtered_sweep_table(
            clamp_mode=clamp_mode, stimuli=stimuli
        ).sort_values(by=self.SWEEP_NUMBER)

        if len(sweeps) == 0:
            raise IndexError(
                f"Cannot find {stimuli} sweeps with clamp mode: {clamp_mode} "
            )

        return sweeps[self.SWEEP_NUMBER].values.tolist()

[docs]    @deprecated("call .get_sweep_numbers()[-1] instead")
    def get_sweep_number(
            self,
            stimuli: Collection[str],
            clamp_mode: Optional[str] = None
    ) -> int:
        """Convenience for getting the integer identifier of the temporally 
        latest sweep matching argued criteria.

        Parameters
        ----------
        stimuli : filter to  sweeps presentingthese stimuli
        clamp_mode : filter to sweeps of this clamp mode

        Returns
        -------
        The identifier of the last sweep matching argued criteria
        """
        return self.get_sweep_numbers(stimuli, clamp_mode)[-1]

[docs]    def sweep(self, sweep_number: int) -> Sweep:
        """
        Create an instance of the Sweep class with the data loaded from the 
        from a file

        Parameters
        ----------
        sweep_number: int

        Returns
        -------
        sweep: Sweep object
        """

        sweep_data = self.get_sweep_data(sweep_number)
        sweep_metadata = self._data.get_sweep_metadata(sweep_number)

        time = np.arange(
            len(sweep_data["stimulus"])
        ) / sweep_data["sampling_rate"]

        voltage, current = type(self)._voltage_current(
            sweep_data["stimulus"],
            sweep_data["response"], 
            sweep_metadata["clamp_mode"], 
            enforce_equal_length=True,
        )

        try:
            sweep = Sweep(
                t=time,
                v=voltage,
                i=current,
                sampling_rate=sweep_data["sampling_rate"],
                sweep_number=sweep_number,
                clamp_mode=sweep_metadata["clamp_mode"],
                epochs=sweep_data.get("epochs", None),
            )

        except Exception:
            logging.warning("Error reading sweep %d" % sweep_number)
            raise

        return sweep

[docs]    def sweep_set(
            self, 
            sweep_numbers: Union[Sequence[int], int, None] = None
    ) -> SweepSet:
        """Construct a SweepSet object, which offers convenient access to an 
        ordered collection of sweeps.

        Parameters
        ----------
        sweep_numbers : Identifiers for the sweeps which will make up this set. 
            If None, use all available sweeps.

        Returns
        -------
        A SweepSet constructed from the requested sweeps
        """

        if sweep_numbers is None:
            _sweep_numbers: Sequence = self._data.sweep_numbers
        elif not hasattr(sweep_numbers, "__len__"):  # not testing for order
            _sweep_numbers = [sweep_numbers]
        else:
            _sweep_numbers = sweep_numbers  # type: ignore

        return SweepSet([self.sweep(num) for num in _sweep_numbers])

[docs]    def get_recording_date(self) -> str:
        """Return the date and time at which recording began.

        Returns
        -------
        a string, formatted like: "%Y-%m-%d %H:%M:%S" in local time
        """
        return (
            self._data.get_full_recording_date()
                .strftime("%Y-%m-%d %H:%M:%S")
        )

[docs]    def get_sweep_data(self, sweep_number: int) -> Dict:
        """Obtain the recorded data for a given sweep.

        Parameters
        ----------
        sweep_number : identifier for the sweep whose data will be returned

        Returns
        -------
        A dictionary containing at least:
            {
                'stimulus': np.ndarray,
                'response': np.ndarray,
                'stimulus_unit': string,
                'sampling_rate': float
            }
        """
        sweep_data = cp.copy(self._data.get_sweep_data(sweep_number))
        sweep_data["response"] = _nan_trailing_zeros(sweep_data["response"])

        return sweep_data

[docs]    def get_clamp_mode(self, sweep_number: int) -> str:
        """Obtain the clamp mode of a given sweep. Should be one of 
        EphysDataSet.VOLTAGE_CLAMP or EphysDataSet.CURRENT_CLAMP

        Parameters
        ----------
        sweep_number : identifier for the sweep whose clamp mode will be 
            returned

        Returns
        -------
        The clamp mode of the identified sweep
        """
        return self._data.get_sweep_metadata(sweep_number)["clamp_mode"]

[docs]    def get_stimulus_code(self, sweep_number: int) -> str:
        """Return the (short form) stimulus code for a particular sweep.

        Parameters
        ----------
        sweep_number : identifier for the sweep whose stimulus code will be 
            returned

        Returns
        -------
        code defining the stimulus presented on the identified sweep
        """
        return self._data.get_stimulus_code(sweep_number)

[docs]    def get_stimulus_code_ext(self, sweep_number: int) -> str:
        """Obtain the extended stimulus code for a sweep. This is the stimulus 
        code for that sweep augmented with an integer counter describing the 
        number of presentations of that stimulus up to and including the 
        requested sweep.

        Parameters
        ----------
        sweep_number : identifies the sweep whose extended stimulus code will 
        be returned

        Returns
        -------
        A string of the form "{stimulus_code}[{counter}]"
        """
        if not hasattr(self, "self._stimulus_repeat_lookup"):
            self._setup_stimulus_repeat_lookup()

        repeat = self._stimulus_repeat_lookup[sweep_number]
        code = self.get_stimulus_code(sweep_number)
        return f"{code}[{repeat}]"

[docs]    def get_stimulus_units(self, sweep_number: int) -> str:
        """Report the SI unit of measurement for a sweep's stimulus data

        Parameters
        ----------
        sweep_number : identifies the sweep whose stimulus unit will be 
            returned

        Returns
        -------
        An SI (or derived) unit's name
        """
        return self._data.get_sweep_metadata(sweep_number)["stimulus_units"]

    @classmethod
    def _voltage_current(
            cls,
            stimulus: np.ndarray,
            response: np.ndarray, 
            clamp_mode: str,
            enforce_equal_length: bool = True
    ) -> Tuple[np.array, np.array]:
        """Resolve the stimulus and response arrays from a sweep's data into 
        voltage and current, using the clamp mode as a guide

        Parameters
        ----------
        stimulus : stimulus trace
        response : response trace
        clamp_mode : Used to map stimulus and response to voltage and current
        enforce_equal_length : Raise a ValueError if the stimulus and 
            response arrays have uneven numbers of samples

        Returns
        -------
        The voltage and current traces.

        """            

        if clamp_mode == cls.VOLTAGE_CLAMP:
            voltage = stimulus
            current = response
        elif clamp_mode == cls.CURRENT_CLAMP:
            voltage = response
            current = stimulus
        else:
            raise ValueError(f"Invalid clamp mode: {clamp_mode}")

        if enforce_equal_length and len(voltage) != len(current):
            raise ValueError(
                f"found {len(voltage)} voltage samples, "
                f"but {len(current)} current samples"
            )

        return voltage, current

def _nan_trailing_zeros(
        array: np.ndarray, 
        inplace: bool = False
) -> np.ndarray:
    """If an array ends with one or more zeros, replace those zeros with 
    np.nan
    """

    if not inplace:
        array = array.copy()

    nonzero = np.flatnonzero(array)
    if len(nonzero) == 0 or nonzero[-1] + 1 >= len(array):
        return array

    array[nonzero[-1] + 1:] = np.nan
    return array
Source code for ipfx.dataset.ephys_data_set

Contents

Questions