Source code for galaxychop.models._gaussian_mixture

# This file is part of
# the galaxy-chop project (https://github.com/vcristiani/galaxy-chop)
# Copyright (c) 2021, Valeria Cristiani
# License: MIT
# Full Text: https://github.com/vcristiani/galaxy-chop/blob/master/LICENSE.txt

"""Gaussian Mixture Models."""


# =============================================================================
# IMPORTS
# =============================================================================

import joblib

import numpy as np

from sklearn import mixture

from ._base import DynamicStarsDecomposerMixin, GalaxyDecomposerABC, hparam
from ..utils import doc_inherit


# =============================================================================
# GAUSSIAN ABC
# =============================================================================
[docs]class DynamicStarsGaussianDecomposerABC( DynamicStarsDecomposerMixin, GalaxyDecomposerABC ): """Dynamic Stars Gaussian Decomposer Class. Parameters ---------- covariance_type : {‘full’, ‘tied’, ‘diag’, ‘spherical’}, default="full" Parameter of :py:class:``GaussianMixture`` class into `scikit-learn` library. tol : float, default=0.001 Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. reg_covar : float, default=1e-06 Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. max_iter : float, default=100 Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. n_init : int, default=10 Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. init_params : {‘kmeans’, ‘random’}, default="kmeans" Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. weights_init : array-like of shape (n_components, ), default=None Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. means_init : array-like of shape (n_components, n_features), default=None Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. precisions_init : array-like, default=None Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. random_state : int, default=None Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. warm_start : bool, default=False Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. verbose : int, default=0 Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. verbose_interval : int, default=10 """ covariance_type = hparam(default="full") tol = hparam(default=0.001) reg_covar = hparam(default=1e-06) max_iter = hparam(default=100) n_init = hparam(default=10) init_params = hparam(default="kmeans") weights_init = hparam(default=None) means_init = hparam(default=None) precisions_init = hparam(default=None) random_state = hparam(default=None, converter=np.random.default_rng) warm_start = hparam(default=False) verbose = hparam(default=0) verbose_interval = hparam(default=10)
[docs] @doc_inherit(GalaxyDecomposerABC.get_attributes) def get_attributes(self): """ Notes ----- In this model the parameter space is given by normalized_star_energy: normalized specific energy of the stars eps: circularity parameter (J_z/J_circ) eps_r: projected circularity parameter (J_p/J_circ). """ return ["normalized_star_energy", "eps", "eps_r"]
# ============================================================================= # GMM # =============================================================================
[docs]class GaussianMixture(DynamicStarsGaussianDecomposerABC): """GaussianMixture class. Implementation of the method for dynamically decomposing galaxies described by Obreja et al.(2018) [7]_ . Parameters ---------- n_components: int, default=2 The number of mixture components. Parameter of :py:class:``GaussianMixture`` class into ``scikit-learn`` library. **kwargs: key, value mappings Other optional keyword arguments are passed through to :py:class:``GaussianMixture`` class into ``scikit-learn`` library. Notes ----- More information for ``GaussianMixture`` class: https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html Examples -------- Example of implementation of Gaussian Mixture Model. >>> import galaxychop as gchop >>> galaxy = gchop.read_hdf5(...) >>> galaxy = gchop.star_align(gchop.center(galaxy)) >>> chopper = gchop.GaussianMixture() >>> chopper.decompose(galaxy) References ---------- .. [7] Obreja, A., “Introducing galactic structure finder: the multiple stellar kinematic structures of a simulated Milky Way mass galaxy”, Monthly Notices of the Royal Astronomical Society, vol. 477, no. 4, pp. 4915-4930, 2018. doi:10.1093/mnras/sty1022. `<https://ui.adsabs.harvard.edu/abs/2018MNRAS.477.4915O/abstract>`_ """ n_components = hparam(default=2)
[docs] @doc_inherit(GalaxyDecomposerABC.split) def split(self, X, y, attributes): """ Notes ----- The attributes used by the model are described in detail in the class documentation. """ random_state = np.random.RandomState(self.random_state.bit_generator) gmm = mixture.GaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) gmm_ = gmm.fit(X) labels = gmm_.predict(X) proba = gmm_.predict_proba(X) return labels, proba
# ============================================================================= # AUTO GMM # =============================================================================
[docs]class AutoGaussianMixture(DynamicStarsGaussianDecomposerABC): """AutoGaussianMixture class. Implementation of the auto-gmm method for dynamically decomposing galaxies described by Du et al.(2019) [8]_ . Parameters ---------- c_bic: float, default=0.1 Cut value of the criteria for the automatic choice of the number of gaussians. n_jobs : int, default=None **kwargs: key, value mappings Other optional keyword arguments are passed through to :py:class:``GaussianMixture`` class into ``scikit-learn`` library. Notes ----- Index of the cluster each stellar particles belongs to: Index of the cluster each stellar particles belongs to. Index=0: correspond to galaxy stellar halo. Index=1: correspond to galaxy bulge. Index=2: correspond to galaxy cold disk. Index=3: correspond to galaxy warm disk. Examples -------- Example of implementation of auto-gmm model. >>> import galaxychop as gchop >>> galaxy = gchop.read_hdf5(...) >>> galaxy = gchop.star_align(gchop.center(galaxy)) >>> chopper = gchop.AutoGaussianMixture() >>> chopper.decompose(galaxy) References ---------- .. [8] Du, M., “Identifying Kinematic Structures in Simulated Galaxies Using Unsupervised Machine Learning”, The Astrophysical Journal, vol. 884, no. 2, 2019. doi:10.3847/1538-4357/ab43cc. `<https://ui.adsabs.harvard.edu/abs/2019ApJ...884..129D/abstract>`_ """ c_bic = hparam(default=0.1) n_jobs = hparam(default=None) _COMPONENTS_TO_TRY = np.arange(2, 16) def _run_gmm(self, X, n_components, random_state): gmm = mixture.GaussianMixture( n_components=n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) gmm.fit(X) return gmm def _try_components(self, X, n_components, random_state): gmm = self._run_gmm(X, n_components, random_state) return gmm.bic(X) / len(X)
[docs] @doc_inherit(GalaxyDecomposerABC.split) def split(self, X, y, attributes): # for simplicity we conver the default_rng to a scikit-learn # compatible RandomState random_state = np.random.RandomState(self.random_state.bit_generator) # we copy self.components_to_try for simplicity ctt = self._COMPONENTS_TO_TRY # no we need multiple seed to creates a parrallel run of the GMM seeds = random_state.randint(np.iinfo(np.int32).max, size=len(ctt)) with joblib.Parallel( n_jobs=self.n_jobs, verbose=self.verbose, prefer="processes" ) as P: # make the method delayed try_components = joblib.delayed(self._try_components) # excecute the trrys in parallel bic_med = P( try_components(X, n_components, seed) for n_components, seed in zip(ctt, seeds) ) # convert bic_med to array bic_med = np.asarray(bic_med) # continue as normal bic_min = np.sum(bic_med[-5:]) / 5.0 delta_bic_ = bic_med - bic_min # Criteria for the choice of the number of gaussians. mask = np.where(delta_bic_ <= self.c_bic)[0] # Number of components number_of_gaussians = np.min(ctt[mask]) # Clustering with gaussian mixture and the parameters obtained. gcgmm_ = self._run_gmm(X, number_of_gaussians, random_state) n_components = gcgmm_.n_components center = gcgmm_.means_ predict_proba = gcgmm_.predict_proba(X) # We add up the probabilities to obtain the classification of the # different particles. halo = np.zeros(len(X)) bulge = np.zeros(len(X)) cold_disk = np.zeros(len(X)) warm_disk = np.zeros(len(X)) for i in range(n_components): if center[i, 1] >= 0.85: cold_disk = cold_disk + predict_proba[:, i] if (center[i, 1] < 0.85) & (center[i, 1] >= 0.5): warm_disk = warm_disk + predict_proba[:, i] if (center[i, 1] < 0.5) & (center[i, 0] >= -0.75): halo = halo + predict_proba[:, i] if (center[i, 1] < 0.5) & (center[i, 0] < -0.75): bulge = bulge + predict_proba[:, i] probability = np.column_stack((halo, bulge, cold_disk, warm_disk)) labels = probability.argmax(axis=1) return labels, probability