|
38 | 38 | "ZeroInflatedNegativeBinomial",
|
39 | 39 | "DiscreteUniform",
|
40 | 40 | "Geometric",
|
| 41 | + "HyperGeometric", |
41 | 42 | "Categorical",
|
42 | 43 | "OrderedLogistic",
|
43 | 44 | ]
|
@@ -809,6 +810,118 @@ def logp(self, value):
|
809 | 810 | return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
|
810 | 811 |
|
811 | 812 |
|
| 813 | +class HyperGeometric(Discrete): |
| 814 | + R""" |
| 815 | + Discrete hypergeometric distribution. |
| 816 | +
|
| 817 | + The probability of :math:`x` successes in a sequence of :math:`n` bernoulli |
| 818 | + trials taken without replacement from a population of :math:`N` objects, |
| 819 | + containing :math:`k` good (or successful or Type I) objects. |
| 820 | + The pmf of this distribution is |
| 821 | +
|
| 822 | + .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}} |
| 823 | +
|
| 824 | + .. plot:: |
| 825 | +
|
| 826 | + import matplotlib.pyplot as plt |
| 827 | + import numpy as np |
| 828 | + import scipy.stats as st |
| 829 | + plt.style.use('seaborn-darkgrid') |
| 830 | + x = np.arange(1, 15) |
| 831 | + N = 50 |
| 832 | + k = 10 |
| 833 | + for n in [20, 25]: |
| 834 | + pmf = st.hypergeom.pmf(x, N, k, n) |
| 835 | + plt.plot(x, pmf, '-o', label='n = {}'.format(n)) |
| 836 | + plt.plot(x, pmf, '-o', label='N = {}'.format(N)) |
| 837 | + plt.plot(x, pmf, '-o', label='k = {}'.format(k)) |
| 838 | + plt.xlabel('x', fontsize=12) |
| 839 | + plt.ylabel('f(x)', fontsize=12) |
| 840 | + plt.legend(loc=1) |
| 841 | + plt.show() |
| 842 | +
|
| 843 | + ======== ============================= |
| 844 | + Support :math:`x \in \left[\max(0, n - N + k), \min(k, n)\right]` |
| 845 | + Mean :math:`\dfrac{nk}{N}` |
| 846 | + Variance :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}` |
| 847 | + ======== ============================= |
| 848 | +
|
| 849 | + Parameters |
| 850 | + ---------- |
| 851 | + N : integer |
| 852 | + Total size of the population |
| 853 | + k : integer |
| 854 | + Number of successful individuals in the population |
| 855 | + n : integer |
| 856 | + Number of samples drawn from the population |
| 857 | + """ |
| 858 | + |
| 859 | + def __init__(self, N, k, n, *args, **kwargs): |
| 860 | + super().__init__(*args, **kwargs) |
| 861 | + self.N = intX(N) |
| 862 | + self.k = intX(k) |
| 863 | + self.n = intX(n) |
| 864 | + self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2))) |
| 865 | + |
| 866 | + def random(self, point=None, size=None): |
| 867 | + r""" |
| 868 | + Draw random values from HyperGeometric distribution. |
| 869 | +
|
| 870 | + Parameters |
| 871 | + ---------- |
| 872 | + point : dict, optional |
| 873 | + Dict of variable values on which random values are to be |
| 874 | + conditioned (uses default point if not specified). |
| 875 | + size : int, optional |
| 876 | + Desired size of random sample (returns one sample if not |
| 877 | + specified). |
| 878 | +
|
| 879 | + Returns |
| 880 | + ------- |
| 881 | + array |
| 882 | + """ |
| 883 | + |
| 884 | + N, k, n = draw_values([self.N, self.k, self.n], point=point, size=size) |
| 885 | + return generate_samples(self._random, N, k, n, dist_shape=self.shape, size=size) |
| 886 | + |
| 887 | + def _random(self, M, n, N, size=None): |
| 888 | + r"""Wrapper around scipy stat's hypergeom.rvs""" |
| 889 | + try: |
| 890 | + samples = stats.hypergeom.rvs(M=M, n=n, N=N, size=size) |
| 891 | + return samples |
| 892 | + except ValueError: |
| 893 | + raise ValueError("Domain error in arguments") |
| 894 | + |
| 895 | + def logp(self, value): |
| 896 | + r""" |
| 897 | + Calculate log-probability of HyperGeometric distribution at specified value. |
| 898 | +
|
| 899 | + Parameters |
| 900 | + ---------- |
| 901 | + value : numeric |
| 902 | + Value(s) for which log-probability is calculated. If the log probabilities for multiple |
| 903 | + values are desired the values must be provided in a numpy array or theano tensor |
| 904 | +
|
| 905 | + Returns |
| 906 | + ------- |
| 907 | + TensorVariable |
| 908 | + """ |
| 909 | + N = self.N |
| 910 | + k = self.k |
| 911 | + n = self.n |
| 912 | + tot, good = N, k |
| 913 | + bad = tot - good |
| 914 | + result = ( |
| 915 | + betaln(good + 1, 1) |
| 916 | + + betaln(bad + 1, 1) |
| 917 | + + betaln(tot - n + 1, n + 1) |
| 918 | + - betaln(value + 1, good - value + 1) |
| 919 | + - betaln(n - value + 1, bad - n + value + 1) |
| 920 | + - betaln(tot + 1, 1) |
| 921 | + ) |
| 922 | + return result |
| 923 | + |
| 924 | + |
812 | 925 | class DiscreteUniform(Discrete):
|
813 | 926 | R"""
|
814 | 927 | Discrete uniform distribution.
|
|
0 commit comments