Best Python code snippet using lisa_python
RE_obj_callable.py
Source:RE_obj_callable.py
1"""2Random element objects.3"""4# TODO: docstrings?5import math6import matplotlib.pyplot as plt7import numpy as np8from scipy.special import betaln, gammaln, xlog1py, xlogy9from scipy.stats._multivariate import multi_rv_generic10from thesis._deprecated.func_obj import FiniteDomainFunc11from thesis.util.generic import check_data_shape, check_valid_pmf12from thesis.util.math import simplex_round13#%% Base RE classes14class BaseRE(multi_rv_generic):15 """16 Base class for generic random element objects.17 """18 def __init__(self, rng=None):19 super().__init__(rng) # may be None or int for legacy numpy rng20 self._data_shape = None21 self._mode = None22 @property23 def data_shape(self):24 return self._data_shape25 @property26 def mode(self):27 return self._mode28 def rvs(self, size=(), random_state=None):29 if type(size) is int:30 size = (size,)31 # elif not size == ():32 # raise TypeError("Input 'size' must be int or ().")33 elif type(size) is not tuple:34 raise TypeError("Input 'size' must be int or tuple.")35 random_state = self._get_random_state(random_state)36 return self._rvs(size, random_state)37 def _rvs(self, size=(), random_state=None):38 raise NotImplementedError("Method must be overwritten.")39 pass40class BaseRV(BaseRE):41 """42 Base class for generic random variable (numeric) objects.43 """44 def __init__(self, rng=None):45 super().__init__(rng)46 self._mean = None47 self._cov = None48 @property49 def mean(self):50 return self._mean51 @property52 def cov(self):53 return self._cov54class DiscreteRE(BaseRE):55 """56 Base class for discrete random element objects.57 """58 def pf(self, x):59 return self.pmf(x)60 def pmf(self, x):61 x, set_shape = check_data_shape(x, self._data_shape)62 return self._pmf(x).reshape(set_shape)63 def _pmf(self, x):64 _out = []65 for x_i in x.reshape((-1,) + self._data_shape):66 _out.append(self._pmf_single(x_i))67 return np.asarray(_out) # returned array may be flattened over 'set_shape'68 def _pmf_single(self, x):69 raise NotImplementedError("Method must be overwritten.")70 pass71class DiscreteRV(DiscreteRE, BaseRV):72 """73 Base class for discrete random variable (numeric) objects.74 """75class ContinuousRV(BaseRV):76 """77 Base class for continuous random element objects.78 """79 def pf(self, x):80 return self.pdf(x)81 def pdf(self, x):82 x, set_shape = check_data_shape(x, self._data_shape)83 return self._pdf(x).reshape(set_shape)84 def _pdf(self, x):85 _out = []86 for x_i in x.reshape((-1,) + self._data_shape):87 _out.append(self._pdf_single(x_i))88 return np.asarray(_out) # returned array may be flattened89 def _pdf_single(self, x):90 raise NotImplementedError("Method must be overwritten.")91 pass92#%% Specific RE's93class FiniteRE(DiscreteRE):94 """95 Generic RE drawn from a finite support set using an explicitly defined PMF.96 """97 def __new__(cls, pmf, rng=None): # TODO: function type check98 if np.issubdtype(pmf.supp.dtype, np.number):99 return super().__new__(FiniteRV)100 else:101 return super().__new__(cls)102 def __init__(self, pmf, rng=None):103 super().__init__(rng)104 self.pmf = pmf105 self._update_attr()106 @classmethod107 def gen_func(cls, supp, p, rng=None):108 p = np.asarray(p)109 pmf = FiniteDomainFunc(supp, p)110 return cls(pmf, rng)111 # Input properties112 @property113 def supp(self):114 return self._supp115 @property116 def p(self):117 return self._p118 @p.setter # TODO: pmf setter? or just p?119 def p(self, p):120 self.pmf.val = p121 self._update_attr()122 # Attribute Updates123 def _update_attr(self):124 self._supp = self.pmf.supp125 self._p = check_valid_pmf(self.pmf(self._supp))126 self._data_shape = self.pmf.data_shape_x127 self._supp_flat = self.pmf._supp_flat128 self._mode = self.pmf.argmax129 def _rvs(self, size=(), random_state=None):130 i = random_state.choice(self._p.size, size, p=self._p.flatten())131 return self._supp_flat[i].reshape(size + self._data_shape)132 # def _pmf_single(self, x):133 # return self._func(x)134 # def pmf(self, x):135 # return self._func(x)136 def plot_pmf(self, ax=None):137 self.pmf.plot(ax)138class FiniteRV(FiniteRE, DiscreteRV):139 """140 Generic RV drawn from a finite support set using an explicitly defined PMF.141 """142 def _update_attr(self):143 super()._update_attr()144 self._mean = self.pmf.m1145 self._cov = self.pmf.m2c146# s = np.random.random((4, 3, 2, 2))147# pp = np.random.random((4, 3))148# pp = pp / pp.sum()149# f = FiniteRE.gen_func(s, pp)150# f.pmf(f.rvs((4,5)))151# # f.plot_pmf()152#153# s, p = np.stack(np.meshgrid([0,1],[0,1,2]), axis=-1), np.random.random((3,2))154# # s, p = ['a','b','c'], [.3,.2,.5]155# p = p / p.sum()156# f2 = FiniteRE.gen_func(s, p)157# f2.pmf(f2.rvs(4))158# f2.plot_pmf()159def _dirichlet_check_alpha_0(alpha_0):160 # alpha_0 = np.asarray(alpha_0)161 # if alpha_0.size > 1 or alpha_0 <= 0:162 # raise ValueError("Concentration parameter must be a positive scalar.")163 alpha_0 = float(alpha_0)164 if alpha_0 <= 0:165 raise ValueError("Concentration parameter must be a positive scalar.")166 return alpha_0167def _check_func_pmf(f, full_support=False):168 if f.data_shape_y != ():169 raise ValueError("Must be scalar function.")170 if full_support and f.min <= 0:171 raise ValueError("Function range must be positive real.")172 if not full_support and f.min < 0:173 raise ValueError("Function range must be non-negative real.")174 return f175def _dirichlet_check_input(x, alpha_0, mean):176 # x = check_valid_pmf(x, shape=mean.shape)177 if not isinstance(x, type(mean)):178 raise TypeError("Input must have same function type as mean.")179 # if np.logical_and(x == 0, mean < 1 / alpha_0).any():180 if np.logical_and(x.val == 0, mean.val < 1 / alpha_0).any():181 raise ValueError(182 "Each element in 'x' must be greater than "183 "zero if the corresponding mean element is less than 1 / alpha_0."184 )185 return x186class DirichletRV(ContinuousRV):187 """188 Dirichlet random process, finite-supp realizations.189 """190 def __init__(self, alpha_0, mean, rng=None):191 super().__init__(rng)192 self._alpha_0 = _dirichlet_check_alpha_0(alpha_0)193 self._mean = _check_func_pmf(mean, full_support=True)194 self._update_attr()195 @classmethod196 def gen_func(cls, alpha_0, supp, p, rng=None):197 p = np.asarray(p)198 mean = FiniteDomainFunc(supp, p)199 return cls(alpha_0, mean, rng)200 # Input properties201 @property202 def alpha_0(self):203 return self._alpha_0204 @alpha_0.setter205 def alpha_0(self, alpha_0):206 self._alpha_0 = _dirichlet_check_alpha_0(alpha_0)207 self._update_attr()208 @property209 def mean(self):210 return self._mean211 @mean.setter212 def mean(self, mean):213 self._mean = _check_func_pmf(mean, full_support=True)214 self._update_attr()215 # Attribute Updates216 def _update_attr(self):217 self._data_shape = self._mean.set_shape218 self._data_size = math.prod(self._data_shape)219 if self._mean.min > 1 / self._alpha_0:220 self._mode = (self._mean - 1 / self._alpha_0) / (1 - self._data_size / self._alpha_0)221 else:222 # warnings.warn("Mode method currently supported for mean > 1/alpha_0 only")Myq.L223 self._mode = None # TODO: complete with general formula224 # TODO: IMPLEMENT COV225 # self._cov = (diag_gen(self._mean) - outer_gen(self._mean, self._mean)) / (self._alpha_0 + 1)226 self._log_pdf_coef = gammaln(self._alpha_0) - np.sum(227 gammaln(self._alpha_0 * self._mean.val)228 )229 def _rvs(self, size=(), random_state=None):230 vals = random_state.dirichlet(self._alpha_0 * self._mean.val.flatten(), size).reshape(231 size + self._data_shape232 )233 if size == ():234 return FiniteDomainFunc(self.mean.supp, vals)235 else:236 return [FiniteDomainFunc(self.mean.supp, val) for val in vals]237 def pdf(self, x): # overwrites base methods...238 x = _dirichlet_check_input(x, self._alpha_0, self._mean)239 log_pdf = self._log_pdf_coef + np.sum(240 xlogy(self._alpha_0 * self._mean.val - 1, x.val).reshape(-1, self._data_size),241 -1,242 )243 return np.exp(log_pdf)244 # def plot_pdf(self, x, ax=None): TODO245 #246 # if self._size in (2, 3):247 # if x is None:248 # x = simplex_grid(40, self._shape, hull_mask=(self.mean < 1 / self.alpha_0))249 # # x = simplex_grid(n_plt, self._shape, hull_mask=(self.mean < 1 / self.alpha_0))250 # x = simplex_grid(n_plt, self._shape, hull_mask=(self.mean < 1 / self.alpha_0))251 # pdf_plt = self.pdf(x)252 # x.resize(x.shape[0], self._size)253 #254 # # pdf_plt.sum() / (n_plt ** (self._size - 1))255 #256 # if self._size == 2:257 # if ax is None:258 # _, ax = plt.subplots()259 # ax.set(xlabel='$x_1$', ylabel='$x_2$')260 #261 # plt_data = ax.scatter(x[:, 0], x[:, 1], s=15, c=pdf_plt)262 #263 # c_bar = plt.colorbar(plt_data)264 # c_bar.set_label(r'$\mathrm{p}_\mathrm{x}(x)$')265 #266 # elif self._size == 3:267 # if ax is None:268 # _, ax = plt.subplots(subplot_kw={'projection': '3d'})269 # ax.view_init(35, 45)270 # ax.set(xlabel='$x_1$', ylabel='$x_2$', zlabel='$x_3$')271 #272 # plt_data = ax.scatter(x[:, 0], x[:, 1], x[:, 2], s=15, c=pdf_plt)273 #274 # c_bar = plt.colorbar(plt_data)275 # c_bar.set_label(r'$\mathrm{p}_\mathrm{x}(x)$')276 #277 # return plt_data278 #279 # else:280 # raise NotImplementedError('Plot method only supported for 2- and 3-dimensional data.')281# rng = np.random.default_rng()282# a0 = 100283# supp = list('abc')284# val = np.random.random(3)285# val = val / val.sum()286# m = FiniteDomainFunc(supp, val)287# m('a')288#289# d = Dirichlet(a0, m, rng)290# d.mean291# d.mode292# d.cov293# d.rvs()294# d.pdf(d.rvs())295def _empirical_check_n(n):296 if not isinstance(n, int) or n < 1:297 raise ValueError("Input 'n' must be a positive integer.")298 return n299def _empirical_check_input(x, n, mean):300 # x = check_valid_pmf(x, shape=mean.shape)301 if not isinstance(x, type(mean)):302 raise TypeError("Input must have same function type as mean.")303 # if (np.minimum((n * x) % 1, (-n * x) % 1) > 1e-9).any():304 if (np.minimum((n * x.val) % 1, (-n * x.val) % 1) > 1e-9).any():305 raise ValueError("Each entry in 'x' must be a multiple of 1/n.")306 return x307class EmpiricalRV(DiscreteRV):308 """309 Empirical random process, finite-supp realizations.310 """311 def __init__(self, n, mean, rng=None):312 super().__init__(rng)313 self._n = _empirical_check_n(n)314 self._mean = _check_func_pmf(mean, full_support=False)315 self._update_attr()316 @classmethod317 def gen_func(cls, n, supp, p, rng=None):318 p = np.asarray(p)319 mean = FiniteDomainFunc(supp, p)320 return cls(n, mean, rng)321 # Input properties322 @property323 def n(self):324 return self._n325 @n.setter326 def n(self, n):327 self._n = _empirical_check_n(n)328 self._update_attr()329 @property330 def mean(self):331 return self._mean332 @mean.setter333 def mean(self, mean):334 self._mean = _check_func_pmf(mean, full_support=True)335 self._update_attr()336 # Attribute Updates337 def _update_attr(self):338 self._data_shape = self._mean.set_shape339 self._data_size = self._mean.size340 self._mode = ((self._n * self._mean) // 1) + FiniteDomainFunc(341 self._mean.supp, simplex_round((self._n * self._mean.val) % 1)342 )343 # TODO: IMPLEMENT COV344 # self._cov = (diag_gen(self._mean) - outer_gen(self._mean, self._mean)) / self._n345 self._log_pmf_coef = gammaln(self._n + 1)346 def _rvs(self, size=(), random_state=None):347 vals = random_state.multinomial(self._n, self._mean.val.flatten(), size).reshape(348 size + self._data_shape349 )350 if size == ():351 return FiniteDomainFunc(self.mean.supp, vals)352 else:353 return [FiniteDomainFunc(self.mean.supp, val) for val in vals]354 def pmf(self, x):355 x = _empirical_check_input(x, self._n, self._mean)356 log_pmf = self._log_pmf_coef + (357 xlogy(self._n * x.val, self._mean.val) - gammaln(self._n * x.val + 1)358 ).reshape(-1, self._data_size).sum(axis=-1)359 return np.exp(log_pmf)360 # def plot_pmf(self, ax=None):361 #362 # if self._size in (2, 3):363 # x = simplex_grid(self.n, self._shape)364 # pmf_plt = self.pmf(x)365 # x.resize(x.shape[0], self._size)366 #367 # if self._size == 2:368 # if ax is None:369 # _, ax = plt.subplots()370 # ax.set(xlabel='$x_1$', ylabel='$x_2$')371 #372 # plt_data = ax.scatter(x[:, 0], x[:, 1], s=15, c=pmf_plt)373 #374 # c_bar = plt.colorbar(plt_data)375 # c_bar.set_label(r'$\mathrm{P}_\mathrm{x}(x)$')376 #377 # elif self._size == 3:378 # if ax is None:379 # _, ax = plt.subplots(subplot_kw={'projection': '3d'})380 # ax.view_init(35, 45)381 # ax.set(xlabel='$x_1$', ylabel='$x_2$', zlabel='$x_3$')382 #383 # plt_data = ax.scatter(x[:, 0], x[:, 1], x[:, 2], s=15, c=pmf_plt)384 #385 # c_bar = plt.colorbar(plt_data)386 # c_bar.set_label(r'$\mathrm{P}_\mathrm{x}(x)$')387 #388 # return plt_data389 #390 # else:391 # raise NotImplementedError('Plot method only supported for 2- and 3-dimensional data.')392# rng = np.random.default_rng()393# n = 10394# m = np.random.random((1, 3))395# m = m / m.sum()396# d = Empirical(n, m, rng)397# d.plot_pmf()398# d.mean399# d.mode400# d.cov401# d.rvs()402# d.pmf(d.rvs())403# d.pmf(d.rvs(4).reshape((2, 2) + d.mean.shape))404class DirichletEmpiricalRV(DiscreteRV):405 """406 Dirichlet-Empirical random process, finite-supp realizations.407 """408 def __init__(self, n, alpha_0, mean, rng=None):409 super().__init__(rng)410 self._n = _empirical_check_n(n)411 self._alpha_0 = _dirichlet_check_alpha_0(alpha_0)412 self._mean = _check_func_pmf(mean, full_support=False)413 self._update_attr()414 # Input properties415 @property416 def n(self):417 return self._n418 @n.setter419 def n(self, n):420 self._n = _empirical_check_n(n)421 self._update_attr()422 @property423 def alpha_0(self):424 return self._alpha_0425 @alpha_0.setter426 def alpha_0(self, alpha_0):427 self._alpha_0 = _dirichlet_check_alpha_0(alpha_0)428 self._update_attr()429 @property430 def mean(self):431 return self._mean432 @mean.setter433 def mean(self, mean):434 self._mean = _check_func_pmf(mean, full_support=True)435 self._update_attr()436 # Attribute Updates437 def _update_attr(self):438 self._data_shape = self._mean.shape439 self._data_size = self._mean.size440 # TODO: mode? cov?441 # self._cov = ((1/self._n + 1/self._alpha_0) / (1 + 1/self._alpha_0)442 # * (diag_gen(self._mean) - outer_gen(self._mean, self._mean)))443 self._log_pmf_coef = (444 gammaln(self._alpha_0)445 - np.sum(gammaln(self._alpha_0 * self._mean.val))446 + gammaln(self._n + 1)447 - gammaln(self._alpha_0 + self._n)448 )449 def _rvs(self, size=(), random_state=None):450 # return rng.multinomial(self._n, self._mean.flatten(), size).reshape(size + self._shape) / self._n451 raise NotImplementedError452 def pmf(self, x):453 x = _empirical_check_input(x, self._n, self._mean)454 log_pmf = self._log_pmf_coef + (455 gammaln(self._alpha_0 * self._mean.val + self._n * x) - gammaln(self._n * x + 1)456 ).reshape(-1, self._data_size).sum(axis=-1)457 return np.exp(log_pmf)458 # def plot_pmf(self, ax=None): # TODO: reused code. define simplex plot_xy outside!459 #460 # if self._size in (2, 3):461 # x = simplex_grid(self.n, self._shape)462 # pmf_plt = self.pmf(x)463 # x.resize(x.shape[0], self._size)464 #465 # if self._size == 2:466 # if ax is None:467 # _, ax = plt.subplots()468 # ax.set(xlabel='$x_1$', ylabel='$x_2$')469 #470 # plt_data = ax.scatter(x[:, 0], x[:, 1], s=15, c=pmf_plt)471 #472 # c_bar = plt.colorbar(plt_data)473 # c_bar.set_label(r'$\mathrm{P}_\mathrm{x}(x)$')474 #475 # elif self._size == 3:476 # if ax is None:477 # _, ax = plt.subplots(subplot_kw={'projection': '3d'})478 # ax.view_init(35, 45)479 # ax.set(xlabel='$x_1$', ylabel='$x_2$', zlabel='$x_3$')480 #481 # plt_data = ax.scatter(x[:, 0], x[:, 1], x[:, 2], s=15, c=pmf_plt)482 #483 # c_bar = plt.colorbar(plt_data)484 # c_bar.set_label(r'$\mathrm{P}_\mathrm{x}(x)$')485 #486 # return plt_data487 #488 # else:489 # raise NotImplementedError('Plot method only supported for 2- and 3-dimensional data.')490# rng = np.random.default_rng()491# n = 10492# a0 = 600493# m = np.ones((1, 3))494# m = m / m.sum()495# d = DirichletEmpirical(n, a0, m, rng)496# d.plot_pmf()497# d.mean498# d.mode499# d.cov500# d.rvs()501# d.pmf(d.rvs())502# d.pmf(d.rvs(4).reshape((2, 2) + d.mean.shape))503class EmpiricalRP(DiscreteRV): # CONTINUOUS504 """505 Empirical random process, continuous support.506 """507 def __init__(self, n, mean, rng=None):508 super().__init__(rng)509 self._n = _empirical_check_n(n)510 if not isinstance(mean, BaseRE):511 raise TypeError("Mean input must be an RE object.")512 self._mean = mean513 self._update_attr()514 # Input properties515 @property516 def n(self):517 return self._n518 @n.setter519 def n(self, n):520 self._n = _empirical_check_n(n)521 self._update_attr()522 @property523 def mean(self):524 return self._mean525 @mean.setter526 def mean(self, mean):527 if not isinstance(mean, BaseRE):528 raise TypeError("Mean input must be an RE object.")529 self._mean = mean530 self._update_attr()531 # Attribute Updates532 def _update_attr(self):533 self._data_shape = self._mean.data_shape534 # self._size = self._mean.size535 # self._mode = ((self._n * self._mean) // 1) + FiniteDomainFunc(self._mean.supp,536 # simplex_round((self._n * self._mean.val) % 1))537 # TODO: IMPLEMENT COV538 # self._cov = (diag_gen(self._mean) - outer_gen(self._mean, self._mean)) / self._n539 # self._log_pmf_coef = gammaln(self._n + 1)540 def _rvs(self, size=(), random_state=None):541 raise NotImplementedError # FIXME542 vals = random_state.multinomial(self._n, self._mean.val.flatten(), size).reshape(543 size + self._shape544 )545 if size == ():546 return FiniteDomainFunc(self.mean.supp, vals)547 else:548 return [FiniteDomainFunc(self.mean.supp, val) for val in vals]549 # def pmf(self, x):550 # x = _empirical_check_input(x, self._n, self._mean)551 #552 # log_pmf = self._log_pmf_coef + (xlogy(self._n * x.val, self._mean.val)553 # - gammaln(self._n * x.val + 1)).reshape(-1, self._size).sum(axis=-1)554 # return np.exp(log_pmf)555class SampsDE(BaseRE):556 """557 FAKE samples from continuous DP realization558 """559 def __init__(self, n, alpha_0, mean, rng=None):560 super().__init__(rng)561 self._n = _empirical_check_n(n)562 self._alpha_0 = _dirichlet_check_alpha_0(alpha_0)563 if not isinstance(mean, BaseRE):564 raise TypeError("Mean input must be an RE object.")565 self._mean = mean566 self._data_shape = mean.data_shape567 # Input properties568 @property569 def n(self):570 return self._n571 @property572 def alpha_0(self):573 return self._alpha_0574 @property575 def mean(self):576 return self._mean577 def _rvs(self, size=(), random_state=None):578 if size != ():579 raise ValueError("Size input not used, 'n' is.")580 emp = []581 for n in range(self.n):582 p_mean = 1 / (1 + n / self.alpha_0)583 if random_state.choice([True, False], p=[p_mean, 1 - p_mean]):584 # Sample from mean dist585 emp.append([self.mean.rvs(), 1])586 else:587 # Sample from empirical dist588 cnts = [s[1] for s in emp]589 probs = np.array(cnts) / sum(cnts)590 i = random_state.choice(range(len(emp)), p=probs)591 emp[i][1] += 1592 out = [np.broadcast_to(s, (c, *self.data_shape)) for s, c in emp]593 return np.concatenate(out)594# s, p = ['a','b','c'], np.array([.3,.2,.5])595# p = p / p.sum()596# m = FiniteRE.gen_func(s, p)597# dd = SampsDE(10, 5, m)598# print(dd.rvs())599class BetaRV(ContinuousRV):600 """601 Beta random variable.602 """603 def __init__(self, a, b, rng=None):604 super().__init__(rng)605 if a <= 0 or b <= 0:606 raise ValueError("Parameters must be strictly positive.")607 self._a, self._b = a, b608 self._data_shape = ()609 self._update_attr()610 # Input properties611 @property612 def a(self):613 return self._a614 @a.setter615 def a(self, a):616 if a <= 0:617 raise ValueError618 self._a = a619 self._update_attr()620 @property621 def b(self):622 return self._b623 @b.setter624 def b(self, b):625 if b <= 0:626 raise ValueError627 self._b = b628 self._update_attr()629 # Attribute Updates630 def _update_attr(self):631 if self._a > 1:632 if self._b > 1:633 self._mode = (self._a - 1) / (self._a + self._b - 2)634 else:635 self._mode = 1636 elif self._a <= 1:637 if self._b > 1:638 self._mode = 0639 elif self._a == 1 and self._b == 1:640 self._mode = 0 # any in unit interval641 else:642 self._mode = 0 # any in {0,1}643 self._mean = self._a / (self._a + self._b)644 self._cov = self._a * self._b / (self._a + self._b) ** 2 / (self._a + self._b + 1)645 def _rvs(self, size=(), random_state=None):646 return random_state.beta(self._a, self._b, size)647 def _pdf(self, x):648 log_pdf = xlog1py(self._b - 1.0, -x) + xlogy(self._a - 1.0, x) - betaln(self._a, self._b)649 return np.exp(log_pdf)650 def plot_pdf(self, n_plt, ax=None):651 if ax is None:652 _, ax = plt.subplots()653 ax.set(xlabel="$x$", ylabel="$P_{\mathrm{x}}(x)$")654 x_plt = np.linspace(0, 1, n_plt + 1, endpoint=True)655 plt_data = ax.plot(x_plt, self.pdf(x_plt))...
columns_config.py
Source:columns_config.py
1columns_config = {2 'qs1':{3 'conversion':{4 'values':{5 '_NaN':'_mean'6 },7 },8 'continuous': True,9 },10 'qs2':{11 'conversion': {12 'values':{13 '_NaN':9414 }15 },16 'one_hot_encoding': True,17 },18 'qs3':{19 'conversion': {20 'values':{21 '_NaN':9422 }23 },24 'one_hot_encoding': True,25 },26 'NORTHEAST':{27 'conversion': {28 'values':{29 '_NaN':030 }31 },32 },33 'MIDWEST':{34 'conversion': {35 'values': {36 '_NaN':037 }38 },39 },40 'SOUTH':{41 'conversion': {42 'values': {43 '_NaN':044 }45 },46 },47 'WEST':{48 'conversion': {49 'values': {50 '_NaN':051 }52 },53 },54 'Q1VALUE':{55 'conversion': {56 'values': {57 9998:'_mean',58 9999:'_mean'59 },60 },61 'continuous': True,62 },63 'Q2VALUE':{64 'conversion': {65 'values': {66 9998:'_mean',67 9999:'_mean'68 },69 },70 'continuous': True,71 },72 'Q2Q1DIF':{73 'conversion': {74 'values': {75 '_NaN':'_mean',76 9998:'_mean',77 9999:'_mean'78 },79 },80 'continuous': True81 },82 'Q3VALUE':{83 'conversion': {84 'values': {85 9998:'_mean',86 9999:'_mean'87 },88 },89 'continuous': True,90 },91 'Q4VALUE':{92 'conversion': {93 'values': {94 9998:'_mean',95 9999:'_mean'96 },97 },98 'continuous': True,99 },100 'Q4Q3DIF':{101 'conversion': {102 'values': {103 '_NaN':'_mean',104 9998:'_mean',105 9999:'_mean'106 },107 },108 'continuous': True,109 'available_types':['int','float','float64']110 },111 'Q3Q1DIF':{112 'conversion': {113 },114 'continuous': True,115 },116 'Q4Q2DIF':{117 'conversion': {118 },119 'continuous': True,120 },121 'Q4Q2DIFQ3Q1DIFTOTAL':{122 'conversion': {123 },124 'continuous': True,125 },126 'Q5':{127 'conversion': {128 'values': {129 '_NaN':'_mean'130 },131 },132 'continuous': True,133 },134 'Q6':{135 'conversion': {136 'values': {137 '_NaN':'_mean'138 },139 },140 'continuous': True141 },142 'Q6Q5DIF':{143 'conversion': {144 'values': {145 '_NaN': '_mean',146 },147 },148 'continuous': True149 },150 'q7':{151 'conversion': {152 'values': {153 '_NaN':94154 }155 },156 'one_hot_encoding': True,157 },158 'q8':{159 'conversion': {160 'values': {161 '_NaN':94162 },163 'astype': 'int64'164 },165 'one_hot_encoding': True,166 },167 'q9':{168 'conversion': {169 'values': {170 '_NaN':94171 }172 },173 'one_hot_encoding': True,174 },175 'q10':{176 'conversion': {177 'values': {178 '_NaN':94179 },180 'astype': 'int64'181 },182 'one_hot_encoding': True183 },184 'q11':{185 'conversion': {186 'values': {187 '_NaN':94188 }189 },190 'one_hot_encoding': True191 },192 'q12':{193 'conversion': {194 'values': {195 '_NaN':94196 }197 },198 'one_hot_encoding': True199 },200 'q13a':{201 'conversion': {202 'values': {203 '_NaN':94204 }205 },206 'one_hot_encoding': True207 },208 'q13b':{209 'conversion': {210 'values': {211 '_NaN':94212 }213 },214 'one_hot_encoding': True215 },216 'q13c':{217 'conversion': {218 'values': {219 '_NaN':94220 }221 },222 'one_hot_encoding': True223 },224 'q13d':{225 'conversion': {226 'values': {227 '_NaN':94228 }229 },230 'one_hot_encoding': True231 },232 'q13e':{233 'conversion': {234 'values': {235 '_NaN':94236 }237 },238 'one_hot_encoding': True239 },240 'q13f':{241 'conversion': {242 'values': {243 '_NaN':94244 }245 },246 'one_hot_encoding': True247 },248 'q13g':{249 'conversion': {250 'values': {251 '_NaN':94252 }253 },254 'one_hot_encoding': True255 },256 'EPWORTH':{257 'conversion': {258 'values': {259 '_NaN':'_mean'260 },261 },262 'continuous': True,263 },264 'q14':{265 'conversion': {266 'values': {267 '_NaN':94268 }269 },270 'one_hot_encoding': True271 },272 'q15':{273 'conversion': {274 'values': {275 '_NaN':94276 }277 },278 'one_hot_encoding': True279 },280 'q16a':{281 'conversion': {282 'values': {283 '_NaN':94284 }285 },286 'one_hot_encoding': True287 },288 'q16c':{289 'conversion': {290 'values': {291 '_NaN':94292 }293 },294 'one_hot_encoding': True295 },296 'q16d':{297 'conversion': {298 'values': {299 '_NaN':94300 }301 },302 'one_hot_encoding': True303 },304 'q16e':{305 'conversion': {306 'values': {307 '_NaN':94308 }309 },310 'one_hot_encoding': True311 },312 'q16f':{313 'conversion': {314 'values': {315 '_NaN':94316 }317 },318 'one_hot_encoding': True319 },320 'q17':{321 'conversion': {322 'values': {323 '_NaN':94324 }325 },326 'one_hot_encoding': True327 },328 'q18':{329 'conversion': {330 'values': {331 '_NaN':94332 }333 },334 'one_hot_encoding': True335 },336 'q19a':{337 'conversion': {338 'values': {339 '_NaN':94340 }341 },342 'one_hot_encoding': True343 },344 'q19b':{345 'conversion': {346 'values': {347 '_NaN':94348 }349 },350 'one_hot_encoding': True351 },352 'q19c':{353 'conversion': {354 'values': {355 '_NaN':94356 }357 },358 'one_hot_encoding': True359 },360 'q19d':{361 'conversion': {362 'values': {363 '_NaN':94364 }365 },366 'one_hot_encoding': True367 },368 'q20':{369 'conversion': {370 'values': {371 '_NaN':94372 }373 },374 'one_hot_encoding': True375 },376 'q21':{377 'conversion': {378 'values': {379 '_NaN':94380 }381 },382 'one_hot_encoding': True383 },384 'q22':{385 'conversion': {386 'values': {387 '_NaN':94388 }389 },390 'one_hot_encoding': True391 },392 'q23':{393 'conversion': {394 'values': {395 '_NaN':94396 }397 },398 'one_hot_encoding': True399 },400 'q24':{401 'conversion': {402 'values': {403 '_NaN':94404 }405 },406 'one_hot_encoding': True407 },408 'q25':{409 'conversion': {410 'values': {411 '_NaN':94412 }413 },414 'one_hot_encoding': True415 },416 'q26':{417 'conversion': {418 'values': {419 '_NaN':94420 }421 },422 'one_hot_encoding': True423 },424 'q27':{425 'conversion': {426 'values': {427 '_NaN':94428 },429 'astype': 'int64'430 },431 'one_hot_encoding': True432 },433 'q28':{434 'conversion': {435 'values': {436 '_NaN':94437 },438 'astype': 'int64'439 },440 'one_hot_encoding': True441 },442 'q29a':{443 'conversion': {444 'values': {445 '_NaN':'_mean',446 98:'_mean',447 99:'_mean',448 97: 0.5449 },450 'astype': 'int64'451 },452 'continuous': True,453 },454 'q29b':{455 'conversion': {456 'values': {457 '_NaN':'_mean',458 98:'_mean',459 99:'_mean',460 97: 0.5461 },462 'astype': 'int64'463 },464 'continuous': True,465 },466 'q29c':{467 'conversion': {468 'values': {469 '_NaN':'_mean',470 98:'_mean',471 99:'_mean',472 97: 0.5473 },474 'astype': 'int64'475 },476 'continuous': True,477 },478 'Q29TOTAL':{479 'conversion': {480 'values': {481 98:'_mean',482 99:'_mean'483 }484 },485 'continuous': True,486 },487 'q30':{488 'conversion': {489 'values': {490 1:4,491 2:3,492 3:2,493 4:1494 }495 },496 'is_target_variable': True497 },498 'q31':{499 'conversion': {500 'values': {501 '_NaN':94502 }503 },504 'one_hot_encoding': True505 },506 'q32':{507 'conversion': {508 'values': {509 '_NaN':94510 }511 },512 'one_hot_encoding': True513 },514 'q33':{515 'conversion': {516 'values': {517 '_NaN':94518 }519 },520 'one_hot_encoding': True521 },522 'q34':{523 'conversion': {524 'values': {525 '_NaN':94526 }527 },528 'one_hot_encoding': True529 },530 'q35':{531 'conversion': {532 'values': {533 '_NaN':'_mean',534 996:'_mean',535 998:'_mean',536 999:'_mean'537 },538 },539 'continuous': True,540 'available_types':['int','float','float64']541 },542 'Q36':{543 'conversion': {544 'values': {545 98:'_mean',546 99:'_mean',547 '_NaN':'_mean'548 },549 },550 'continuous': True,551 },552 'q3701':{553 'conversion': {554 'values': {555 '_NaN':94556 },557 'astype': 'int64'558 },559 'one_hot_encoding': True560 },561 'q3702':{562 'conversion': {563 'values': {564 '_NaN':94565 },566 'astype': 'int64'567 },568 'one_hot_encoding': True569 },570 'q3703':{571 'conversion': {572 'values': {573 '_NaN':94574 },575 'astype': 'int64'576 },577 'one_hot_encoding': True578 },579 'Q38':{580 'conversion': {581 'values': {582 98: '_mean',583 99: '_mean',584 '_NaN': '_mean'585 },586 },587 'continuous': True,588 },589 'q3901': {590 'conversion': {591 'values': {592 '_NaN': 94593 },594 'astype': 'int64'595 },596 'one_hot_encoding': True597 },598 'q3902': {599 'conversion': {600 'values': {601 '_NaN': 94602 },603 'astype': 'int64'604 },605 'one_hot_encoding': True606 },607 'q3903': {608 'conversion': {609 'values': {610 '_NaN': 94611 },612 'astype': 'int64'613 },614 'one_hot_encoding': True615 },616 'Q40':{617 'conversion': {618 'values': {619 98:'_mean',620 99:'_mean',621 '_NaN': '_mean'622 },623 },624 'continuous': True,625 },626 'q4101':{627 'conversion': {628 'values': {629 '_NaN':94630 },631 'astype': 'int64'632 },633 'one_hot_encoding': True634 },635 'q4102':{636 'conversion': {637 'values': {638 '_NaN':94639 },640 'astype': 'int64'641 },642 'one_hot_encoding': True643 },644 'q4103':{645 'conversion': {646 'values': {647 '_NaN':94648 },649 'astype': 'int64'650 },651 'one_hot_encoding': True652 },653 'Q36Q38Q40TOTAL':{654 },655 'Q42':{656 'conversion': {657 'values': {658 98: '_mean',659 99: '_mean',660 '_NaN': '_mean'661 },662 },663 'continuous': True,664 },665 'Q43A':{666 'conversion': {667 'values': {668 98: '_mean',669 99: '_mean',670 '_NaN': '_mean'671 },672 },673 'continuous': True,674 },675 'Q43B':{676 'conversion': {677 'values': {678 98: '_mean',679 99: '_mean',680 '_NaN': '_mean'681 },682 },683 'continuous': True,684 },685 'Q43C':{686 'conversion': {687 'values': {688 98: '_mean',689 99: '_mean',690 '_NaN': '_mean'691 },692 },693 'continuous': True,694 },695 'Q43D':{696 'conversion': {697 'values': {698 98: '_mean',699 99: '_mean',700 '_NaN': '_mean'701 },702 },703 'continuous': True,704 },705 'Q43E': {706 'conversion': {707 'values': {708 98: '_mean',709 99: '_mean',710 '_NaN': '_mean'711 },712 },713 'continuous': True,714 },715 'Q43F': {716 'conversion': {717 'values': {718 98: '_mean',719 99: '_mean',720 '_NaN': '_mean'721 },722 },723 'continuous': True,724 },725 'Q43G1': {726 'conversion': {727 'values': {728 98: '_mean',729 99: '_mean',730 '_NaN': '_mean'731 },732 },733 'continuous': True,734 'available_types':['int','float','float64']735 },736 'Q43G2': {737 'conversion': {738 'values': {739 98: '_mean',740 99: '_mean',741 '_NaN': '_mean'742 },743 },744 'continuous': True,745 },746 'Q43G3': {747 'conversion': {748 'values': {749 98: '_mean',750 99: '_mean',751 '_NaN': '_mean'752 },753 },754 'continuous': True,755 },756 'Q43TOTAL':{757 'conversion': {758 'values': {759 98: '_mean',760 99: '_mean',761 '_NaN': '_mean'762 },763 },764 'continuous': True,765 },766 'q4401': {767 'conversion': {768 'values': {769 '_NaN': 94770 },771 'astype': 'int64'772 },773 'one_hot_encoding': True774 },775 'q4402': {776 'conversion': {777 'values': {778 '_NaN': 94779 },780 'astype': 'int64'781 },782 'one_hot_encoding': True783 },784 'q4403': {785 'conversion': {786 'values': {787 '_NaN': 94788 },789 'astype': 'int64'790 },791 'one_hot_encoding': True792 },793 'q45': {794 'conversion': {795 'values': {796 '_NaN': 94797 }798 },799 'one_hot_encoding': True800 },801 'q46': {802 'conversion': {803 'values': {804 '_NaN': 94805 }806 },807 'one_hot_encoding': True808 },809 'q47': {810 'conversion': {811 'values': {812 '_NaN': 94813 }814 },815 'one_hot_encoding': True816 },817 'q48': {818 'conversion': {819 'values': {820 '_NaN': 94821 }822 },823 'one_hot_encoding': True824 },825 'q49': {826 'conversion': {827 'values': {828 '_NaN': 94829 }830 },831 'one_hot_encoding': True832 },833 'q50': {834 'conversion': {835 'values': {836 '_NaN': 94837 }838 },839 'one_hot_encoding': True840 },841 'q51': {842 'conversion': {843 '_NaN': 94844 },845 'one_hot_encoding': True846 },847 'q53': {848 'conversion': {849 'values': {850 '_NaN': 94851 }852 },853 'one_hot_encoding': True854 },855 'q54': {856 'conversion': {857 'values': {858 '_NaN': 94859 }860 },861 'one_hot_encoding': True862 },863 'q55': {864 'conversion': {865 'values': {866 '_NaN': 94867 }868 },869 'one_hot_encoding': True870 },871 'q56': {872 'conversion': {873 'values': {874 '_NaN': 94875 }876 },877 'one_hot_encoding': True878 },879 'q5701': {880 'conversion': {881 'values': {882 '_NaN': 94,883 9 : 94884 },885 'astype': 'int64'886 },887 'one_hot_encoding': True888 },889 'q5702': {890 'conversion': {891 'values': {892 '_NaN': 94893 },894 'astype': 'int64'895 },896 'one_hot_encoding': True897 },898 'q5703': {899 'conversion': {900 'values': {901 '_NaN': 94902 },903 'astype': 'int64'904 },905 'one_hot_encoding': True906 },907 'q5704': {908 'conversion': {909 'values': {910 '_NaN': 94911 }912 },913 },914 'q58': {915 'conversion': {916 'values': {917 '_NaN': 94918 }919 },920 },921 'SHEEWORK': {922 'conversion': {923 'values': {924 '_NaN': '_mean'925 }926 },927 'continuous': True,928 },929 'SHEEFAMILY': {930 'conversion': {931 'values': {932 '_NaN': '_mean'933 }934 },935 'continuous': True,936 },937 'SHEESOCIAL': {938 'conversion': {939 'values': {940 '_NaN': '_mean'941 }942 },943 'continuous': True,944 },945 'SHEEMOOD': {946 'conversion': {947 'values': {948 '_NaN': '_mean'949 }950 },951 'continuous': True,952 },953 'SHEESEX': {954 'conversion': {955 'values': {956 '_NaN': '_mean'957 }958 },959 'continuous': True,960 },961 'SHEETOTAL': {962 'conversion': {963 'values': {964 '_NaN': '_mean'965 },966 },967 'continuous': True,968 },969 'NSFDISABLE': {970 'conversion': {971 'values': {972 '_NaN': '_mean'973 },974 },975 'continuous': True,976 },977 'WEIGHT': {978 'conversion': {979 'values': {980 '_NaN': '_mean'981 }982 },983 'continuous': True,984 },985 'HEIGHT': {986 'conversion': {987 'values': {988 '_NaN': '_mean'989 },990 },991 'continuous': True,992 },993 'BMI': {994 'conversion': {995 'values': {996 '_NaN': '_mean'997 },998 },999 'continuous': True,1000 },1001 'STOPBAG1': {1002 'conversion': {1003 'values': {1004 '_NaN': 01005 }1006 },1007 },1008 'STOPBAG2': {1009 'conversion': {1010 'values': {1011 '_NaN': '_mean'1012 }1013 },1014 'continuous': True,1015 },1016 'IPAQ36': {1017 'conversion': {1018 'values': {1019 '_NaN': '_mean',1020 98: '_mean',1021 99: '_mean'1022 },1023 },1024 'continuous': True,1025 },1026 'IPAQ38': {1027 'conversion': {1028 'values': {1029 '_NaN': '_mean',1030 98: '_mean',1031 99: '_mean'1032 },1033 },1034 'continuous': True,1035 },1036 'IPAQ40': {1037 'conversion': {1038 'values': {1039 '_NaN': '_mean',1040 98: '_mean',1041 99: '_mean',1042 },1043 },1044 'continuous': True,1045 },1046 'IPAQTOTAL': {1047 'conversion': {1048 'values': {1049 '_NaN': 94,1050 98: '_mean',1051 99: '_mean'1052 },1053 },1054 'continuous': True,1055 }...
normal.py
Source:normal.py
1import numpy as np2import paddle3import paddle.fluid as fluid4from .base import Distribution5__all__ = [6 'Normal',7]8class Normal(Distribution):9 def __init__(self,10 dtype='float32',11 param_dtype='float32',12 is_continues=True,13 is_reparameterized=True,14 group_ndims=0,15 **kwargs):16 super(Normal, self).__init__(dtype, 17 param_dtype, 18 is_continues,19 is_reparameterized,20 group_ndims=group_ndims,21 **kwargs)22 try:23 self._std = paddle.cast(paddle.to_tensor([kwargs['std']]), self.dtype) \24 if type(kwargs['std']) in [type(1.), type(1)] else kwargs['std']25 self._logstd = paddle.log(self._std)26 except:27 self._logstd = paddle.cast(paddle.to_tensor([kwargs['logstd']]), self.dtype) \28 if type(kwargs['logstd']) in [type(1.), type(1)] else kwargs['logstd']29 self._std = paddle.exp(self._logstd)30 self._mean = kwargs['mean']31 @property32 def mean(self):33 """The mean of the Normal distribution."""34 return self._mean35 @property36 def logstd(self):37 """The log standard deviation of the Normal distribution."""38 try:39 return self._logstd40 except:41 self._logstd = paddle.log(self._std)42 return self._logstd43 @property44 def std(self):45 """The standard deviation of the Normal distribution."""46 return self._std47 def _sample(self, n_samples=1, **kwargs):48 if n_samples > 1:49 _shape = fluid.layers.shape(self._mean)50 _shape = fluid.layers.concat([paddle.to_tensor([n_samples], dtype="int32"), _shape])51 _len = len(self._std.shape)52 _std = paddle.tile(self._std, repeat_times=[n_samples, *_len*[1]])53 _mean = paddle.tile(self._mean, repeat_times=[n_samples, *_len*[1]])54 else:55 _shape = fluid.layers.shape(self._mean)56 _std = self._std + 0.57 _mean = self._mean + 0.58 if self.is_reparameterized:59 epsilon = paddle.normal(name='sample',60 shape=_shape,61 mean=0.0,62 std=1.0)63 sample_ = _mean + _std * epsilon64 else:65 _mean.stop_gradient = True66 _std.stop_gradient = True67 epsilon = paddle.normal(name='sample',68 shape=_shape,69 mean=0.0,70 std=1.0)71 sample_ = _mean + _std * epsilon72 sample_.stop_gradient = False73 self.sample_cache = sample_74 if n_samples > 1:75 assert(sample_.shape[0] == n_samples)76 return sample_77 def _log_prob(self, sample=None):78 if sample is None:79 sample = self.sample_cache80 if len(sample.shape) > len(self._mean.shape):81 n_samples = sample.shape[0]82 _len = len(self._std.shape)83 _std = paddle.tile(self._std, repeat_times=[n_samples, *_len*[1]]) 84 _mean = paddle.tile(self._mean, repeat_times=[n_samples, *_len*[1]]) 85 else:86 _std = self._std87 _mean = self._mean88 ## Log Prob89 if not self.is_reparameterized:90 _mean.stop_gradient = True91 _std.stop_gradient = True92 logstd = paddle.log(_std)93 c = -0.5 * np.log(2 * np.pi)94 precision = paddle.exp(-2 * logstd)95 log_prob = c - logstd - 0.5 * precision * paddle.square(sample - _mean)96 # log_prob = fluid.layers.reduce_sum(log_prob, dim=-1)97 log_prob.stop_gradient = False...
normalize_utils.py
Source:normalize_utils.py
...17 self._mean = self._oldmean + (x-self._mean)/self._n18 self._var = self._var + (x - self._oldmean)*(x - self._mean)19 self._lock.release()20 return np.clip((x-self.mean)/(self.std+1e-5),-self._clipvalue,self._clipvalue)21 def normalize_without_mean(self,x):22 if self._lock.acquire():23 x = np.asarray(x)24 assert x.shape == self._mean.shape25 self._n += 126 self._oldmean = np.array(self._mean)27 self._mean = self._oldmean + (x-self._mean)/self._n28 self._var = self._var + (x - self._oldmean)*(x - self._mean)29 self._lock.release()30 return np.clip((x)/(self.std+1e-5),-self._clipvalue,self._clipvalue)31 @property32 def n(self):33 return self._n34 @property35 def mean(self):36 return self._mean37 @property38 def var(self):39 return self._var if self._n > 1 else np.square(self._mean)40 @property41 def std(self):42 if self.n <= 1:43 return np.sqrt(np.abs(self._mean))44 return np.sqrt(self.var/self.n)45 @property46 def shape(self):47 return self._mean.shape48class Running_Reward_Normalizer(object):49 def __init__(self,shape,lock,clipvalue=5):50 self._lock = lock51 self._clipvalue = clipvalue 52 self._n = 053 self._mean = np.zeros(shape) 54 self._var = np.zeros(shape)55 def store(self,rewards,gamma):56 if self._lock.acquire():57 x = discount_cumsum(rewards,gamma)[0]58 x = np.asarray(x)59 assert x.shape == self._mean.shape60 self._n += 161 self._oldmean = np.array(self._mean)62 self._mean = self._oldmean + (x-self._mean)/self._n63 self._var = self._var + (x - self._oldmean)*(x - self._mean)64 self._lock.release()65 def normalize(self,x):66 if self.n < 1:67 return x68 return np.clip((x-self.mean)/(np.clip(self.std,0,100)+1e-5),-self._clipvalue,self._clipvalue)69 def normalize_without_mean(self,x):70 if self.n < 1:71 return x72 return np.clip((x)/(np.clip(self.std,0,100)+1e-5),-self._clipvalue,self._clipvalue)73 @property74 def n(self):75 return self._n76 @property77 def mean(self):78 return self._mean79 @property80 def var(self):81 return self._var if self._n > 1 else np.square(self._mean)82 @property83 def std(self):...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!