Source code for galaxychop.models._gaussian_mixture

# This file is part of
# the galaxy-chop project (https://github.com/vcristiani/galaxy-chop)
# Copyright (c) 2021, Valeria Cristiani
# License: MIT
# Full Text: https://github.com/vcristiani/galaxy-chop/blob/master/LICENSE.txt

"""Gaussian Mixture Models."""


# =============================================================================
# IMPORTS
# =============================================================================

import joblib

import numpy as np

from sklearn import mixture

from ._base import DynamicStarsDecomposerMixin, GalaxyDecomposerABC, hparam
from ..utils import doc_inherit


# =============================================================================
# GAUSSIAN ABC
# =============================================================================
[docs]class DynamicStarsGaussianDecomposerABC(
    DynamicStarsDecomposerMixin, GalaxyDecomposerABC
):
    """Dynamic Stars Gaussian Decomposer Class.

    Parameters
    ----------
    covariance_type : {‘full’, ‘tied’, ‘diag’, ‘spherical’}, default="full"
        Parameter of :py:class:``GaussianMixture`` class into `scikit-learn`
        library.
    tol : float, default=0.001
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    reg_covar : float, default=1e-06
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    max_iter : float, default=100
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    n_init : int, default=10
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    init_params : {‘kmeans’, ‘random’}, default="kmeans"
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    weights_init : array-like of shape (n_components, ),  default=None
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    means_init : array-like of shape (n_components, n_features), default=None
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    precisions_init : array-like, default=None
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    random_state : int, default=None
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    warm_start : bool, default=False
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    verbose : int, default=0
        Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn``
        library.
    verbose_interval : int, default=10
    """

    covariance_type = hparam(default="full")
    tol = hparam(default=0.001)
    reg_covar = hparam(default=1e-06)
    max_iter = hparam(default=100)
    n_init = hparam(default=10)
    init_params = hparam(default="kmeans")
    weights_init = hparam(default=None)
    means_init = hparam(default=None)
    precisions_init = hparam(default=None)
    random_state = hparam(default=None, converter=np.random.default_rng)
    warm_start = hparam(default=False)
    verbose = hparam(default=0)
    verbose_interval = hparam(default=10)

[docs]    @doc_inherit(GalaxyDecomposerABC.get_attributes)
    def get_attributes(self):
        """
        Notes
        -----
        In this model the parameter space is given by
            normalized_star_energy: normalized specific energy of the stars
            eps: circularity parameter (J_z/J_circ)
            eps_r: projected circularity parameter (J_p/J_circ).
        """
        return ["normalized_star_energy", "eps", "eps_r"]


# =============================================================================
# GMM
# =============================================================================
[docs]class GaussianMixture(DynamicStarsGaussianDecomposerABC):
    """GaussianMixture class.

    Implementation of the method for dynamically decomposing galaxies described
    by Obreja et al.(2018) [7]_ .

    Parameters
    ----------
    n_components: int, default=2
        The number of mixture components. Parameter of
        :py:class:``GaussianMixture`` class into ``scikit-learn`` library.
    **kwargs: key, value mappings
        Other optional keyword arguments are passed through to
        :py:class:``GaussianMixture`` class into ``scikit-learn`` library.

    Notes
    -----
    More information for ``GaussianMixture`` class:
        https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html

    Examples
    --------
    Example of implementation of Gaussian Mixture Model.

    >>> import galaxychop as gchop
    >>> galaxy = gchop.read_hdf5(...)
    >>> galaxy = gchop.star_align(gchop.center(galaxy))
    >>> chopper = gchop.GaussianMixture()
    >>> chopper.decompose(galaxy)

    References
    ----------
    .. [7] Obreja, A., “Introducing galactic structure finder: the multiple
        stellar kinematic structures of a simulated Milky Way mass galaxy”,
        Monthly Notices of the Royal Astronomical Society, vol. 477, no. 4,
        pp. 4915-4930, 2018. doi:10.1093/mnras/sty1022.
        `<https://ui.adsabs.harvard.edu/abs/2018MNRAS.477.4915O/abstract>`_
    """

    n_components = hparam(default=2)

[docs]    @doc_inherit(GalaxyDecomposerABC.split)
    def split(self, X, y, attributes):
        """
        Notes
        -----
        The attributes used by the model are described in detail in the class
        documentation.
        """
        random_state = np.random.RandomState(self.random_state.bit_generator)

        gmm = mixture.GaussianMixture(
            n_components=self.n_components,
            covariance_type=self.covariance_type,
            tol=self.tol,
            reg_covar=self.reg_covar,
            max_iter=self.max_iter,
            n_init=self.n_init,
            init_params=self.init_params,
            weights_init=self.weights_init,
            means_init=self.means_init,
            precisions_init=self.precisions_init,
            random_state=random_state,
            warm_start=self.warm_start,
            verbose=self.verbose,
            verbose_interval=self.verbose_interval,
        )

        gmm_ = gmm.fit(X)
        labels = gmm_.predict(X)
        proba = gmm_.predict_proba(X)
        return labels, proba


# =============================================================================
# AUTO GMM
# =============================================================================


[docs]class AutoGaussianMixture(DynamicStarsGaussianDecomposerABC):
    """AutoGaussianMixture class.

    Implementation of the auto-gmm method for dynamically decomposing galaxies
    described by Du et al.(2019) [8]_ .

    Parameters
    ----------
    c_bic: float, default=0.1
        Cut value of the criteria for the automatic choice of the number of
        gaussians.
    n_jobs : int, default=None
    **kwargs: key, value mappings
        Other optional keyword arguments are passed through to
        :py:class:``GaussianMixture`` class into ``scikit-learn`` library.

    Notes
    -----
    Index of the cluster each stellar particles belongs to:
        Index of the cluster each stellar particles belongs to.
        Index=0: correspond to galaxy stellar halo.
        Index=1: correspond to galaxy bulge.
        Index=2: correspond to galaxy cold disk.
        Index=3: correspond to galaxy warm disk.

    Examples
    --------
    Example of implementation of auto-gmm model.

    >>> import galaxychop as gchop
    >>> galaxy = gchop.read_hdf5(...)
    >>> galaxy = gchop.star_align(gchop.center(galaxy))
    >>> chopper = gchop.AutoGaussianMixture()
    >>> chopper.decompose(galaxy)

    References
    ----------
    .. [8] Du, M., “Identifying Kinematic Structures in Simulated Galaxies
        Using Unsupervised Machine Learning”, The Astrophysical Journal,
        vol. 884, no. 2, 2019. doi:10.3847/1538-4357/ab43cc.
        `<https://ui.adsabs.harvard.edu/abs/2019ApJ...884..129D/abstract>`_
    """

    c_bic = hparam(default=0.1)
    n_jobs = hparam(default=None)

    _COMPONENTS_TO_TRY = np.arange(2, 16)

    def _run_gmm(self, X, n_components, random_state):
        gmm = mixture.GaussianMixture(
            n_components=n_components,
            covariance_type=self.covariance_type,
            tol=self.tol,
            reg_covar=self.reg_covar,
            max_iter=self.max_iter,
            n_init=self.n_init,
            init_params=self.init_params,
            weights_init=self.weights_init,
            means_init=self.means_init,
            precisions_init=self.precisions_init,
            random_state=random_state,
            warm_start=self.warm_start,
            verbose=self.verbose,
            verbose_interval=self.verbose_interval,
        )
        gmm.fit(X)
        return gmm

    def _try_components(self, X, n_components, random_state):
        gmm = self._run_gmm(X, n_components, random_state)
        return gmm.bic(X) / len(X)

[docs]    @doc_inherit(GalaxyDecomposerABC.split)
    def split(self, X, y, attributes):

        # for simplicity we conver the default_rng to a scikit-learn
        # compatible RandomState
        random_state = np.random.RandomState(self.random_state.bit_generator)

        # we copy self.components_to_try for simplicity
        ctt = self._COMPONENTS_TO_TRY

        # no we need multiple seed to creates a parrallel run of the GMM
        seeds = random_state.randint(np.iinfo(np.int32).max, size=len(ctt))

        with joblib.Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose, prefer="processes"
        ) as P:
            # make the method delayed
            try_components = joblib.delayed(self._try_components)

            # excecute the trrys in parallel
            bic_med = P(
                try_components(X, n_components, seed)
                for n_components, seed in zip(ctt, seeds)
            )

        # convert bic_med to array
        bic_med = np.asarray(bic_med)

        # continue as normal
        bic_min = np.sum(bic_med[-5:]) / 5.0
        delta_bic_ = bic_med - bic_min

        # Criteria for the choice of the number of gaussians.
        mask = np.where(delta_bic_ <= self.c_bic)[0]

        # Number of components
        number_of_gaussians = np.min(ctt[mask])

        # Clustering with gaussian mixture and the parameters obtained.
        gcgmm_ = self._run_gmm(X, number_of_gaussians, random_state)

        n_components = gcgmm_.n_components
        center = gcgmm_.means_
        predict_proba = gcgmm_.predict_proba(X)

        # We add up the probabilities to obtain the classification of the
        # different particles.
        halo = np.zeros(len(X))
        bulge = np.zeros(len(X))
        cold_disk = np.zeros(len(X))
        warm_disk = np.zeros(len(X))

        for i in range(n_components):
            if center[i, 1] >= 0.85:
                cold_disk = cold_disk + predict_proba[:, i]
            if (center[i, 1] < 0.85) & (center[i, 1] >= 0.5):
                warm_disk = warm_disk + predict_proba[:, i]
            if (center[i, 1] < 0.5) & (center[i, 0] >= -0.75):
                halo = halo + predict_proba[:, i]
            if (center[i, 1] < 0.5) & (center[i, 0] < -0.75):
                bulge = bulge + predict_proba[:, i]

        probability = np.column_stack((halo, bulge, cold_disk, warm_disk))
        labels = probability.argmax(axis=1)

        return labels, probability