import numpy, scipy.special, scipy.optimize, math, pylab

def build_model(N, n=100, mean=0, std=1, minn=None, minwidth=None):
    """
    Build bins and expected frequencies for a test against gaussian distribution.
    N - number of data points
    n - desired number of points in a bin
    mean, std - mean value and standard deviation of the distribution.
    minn - minimum number of points in a bin. Defaults to n-1.
    minwidth - minimum width of a bin if specified.
    
    Returns a tuple (model_frequencies, bin_boundaries).
    Boundaries always include +-infinity.
    """
    rt2 = math.sqrt(2)
    rt2pi = math.sqrt(2*math.pi)
    
    if N < 2*n:
        return numpy.array([-numpy.inf, numpy.inf]), numpy.array([1])
    
    if minn is None: minn = n-1
    if minn < 1: minn = 1

    bins = [0]
    
    while True:
        left = scipy.special.erfc(bins[-1]/rt2)/2*N
        if left < n + minn:
            bins.append(numpy.inf)
            break
        
        next = None
        if minwidth is not None:
            next = bins[-1] + minwidth/std
            freq = left - scipy.special.erfc(next/rt2)/2.*N
            if freq < minn:
                next = None
        
        if next is None:
            next, infodict, ier, mesg = scipy.optimize.fsolve(func = lambda x: left - scipy.special.erfc(x/rt2)/2.*N - n,
                x0 = bins[-1],
                fprime = lambda x: numpy.exp(-x**2) / rt2pi * N / 2,
                full_output=True, warning=False)
            if ier != 1:
                break
        
        bins.append(next)
    
    bins = numpy.array(bins, dtype=numpy.float64)
    freqs = -numpy.diff(scipy.special.erfc(bins/rt2)/2.) * N
    freqs = numpy.concatenate([freqs[-1::-1], freqs])

    bins = numpy.concatenate([-bins[-1:0:-1], bins])
    
#   freqs = freqs / numpy.sum(freqs) * N
    return freqs, bins * std + mean

def midbins(bins):
    "return an array of middles of the bins"
    bins = numpy.asarray(bins)
    return 0.5*(bins[:-1] + bins[1:])

def chi2(freqs, model_freqs, deg_freedom = 2):
    "calculate chi^2 for counting test. model_freqs^0.5 are used as std. devs."
    return sum((freqs - model_freqs)**2 / model_freqs) / (len(freqs) - deg_freedom)

def pdf(probs, bins):
    "return an estimate of probability density based on probabilities and bin boundaries"
    return probs / (bins[1:] - bins[:-1])

def plotpdf(freqs, bins, bars=True, color1 = '#33ff33', color2='#55ff55', label=None, *args, **vargs):
    """
    plot probability density histogram based on a set of frequencies/bin boundaries.
    If 'bars' is True, bars are plotted, alternating between 'color1' and 'color2',
    otherwise a line is plotted in 'color1'.
    """
    if len(bins) != len(freqs) + 1:
        raise ValueError('Mismatching bins and frequencies')

    N = numpy.sum(freqs)

    if not numpy.isfinite(bins[0]):
        bins = bins[1:]
        freqs = freqs[1:]
    
    if not numpy.isfinite(bins[-1]):
        bins = bins[:-1]
        freqs = freqs[:-1]

    left = bins[:-1]
    width = bins[1:] - bins[:-1]
    height = freqs / width / N
#    height = pdf(freqs / N, bins)

    if bars:
        a = pylab.bar(left = left[0::2], width = width[0::2], height = height[0::2], linewidth=0, color=color1, *args, **vargs)
        if label is not None:
            vargs['label'] = label
        return a + pylab.bar(left = left[1::2], width = width[1::2], height = height[1::2], linewidth=0, color=color2, *args, **vargs)
    else:
        if label is not None:
            vargs['label'] = label
        return pylab.plot(0.5 * (bins[1:] + bins[:-1]), height, color=color1, *args, **vargs)

def histogram(x, n=100):
    """
    Build a histogram with bins of similar population of 'n'
    (the last bin might be up to '2*n')
    
    x - data points
    n - desired number of points in a bin
    
    Returns a tuple (frequencies, boundaries), similar to 'numpy.histogram'.
    """        
    x = numpy.array(x).flatten()
    x.sort()
    
    if len(x) < 1:
        return [], [-numpy.inf, numpy.inf]
    
    start = 0
    bins = []
    freqs = []
    while start < len(x):
        bins.append(x[start])
        end = start + n
        if end >= len(x) - n:
            end = len(x)
        freqs.append(end - start)
        start = end
        
    bins.append(x[-1])
    del x
    return numpy.array(freqs), numpy.array(bins)

def gauss_freqs(bins, N, mean, std):
    norm_bins = (bins - mean) / std / (2.0)**0.5
    left = norm_bins[:-1]
    right = norm_bins[1:]
    freqs = numpy.zeros(len(left))
    ii = left >= 0
    freqs[ii] = scipy.special.erfc(left[ii]) - scipy.special.erfc(right[ii])
    ii = left < 0
    freqs[ii] = scipy.special.erfc(-right[ii]) - scipy.special.erfc(-left[ii])
    return freqs * 0.5 * N

def gauss_residuals(params, bins, freqs, N):
    mean, std = params
    model_freqs = gauss_freqs(bins, N, mean, std)
    return (model_freqs - freqs)**2 / model_freqs
#    return (model_freqs - freqs)**2

def fit_gaussian(data, n=100, guess_mean=None, guess_std=None):
    data = numpy.asarray(data).flatten()
    if guess_mean is None: guess_mean = numpy.mean(data)
    if guess_std is None: guess_std = numpy.std(data)

    freqs, bins = histogram(data, n)
#    freqs, bins = numpy.histogram(data, bins=n, new=True)
    
    bins2 = bins.copy()
    bins2[0] = -numpy.inf
    bins2[-1] = numpy.inf
    
    res = scipy.optimize.leastsq(gauss_residuals, [guess_mean, guess_std], args=(bins2, freqs, len(data)), xtol=1e-15, ftol=1e-15, maxfev=int(1e6))
    mean, std = res[0]
    model_freqs = gauss_freqs(bins2, len(data), mean, std)
    
    return GaussianFit(mean, std, bins, freqs, model_freqs, chi2(freqs, model_freqs, 2))

class GaussianFit(object):
    def __init__(self, mean, std, bins, freqs, model_freqs, chi2):
        self.mean = mean
        self.std = std
        self.bins = bins
        self.freqs = freqs
        self.model_freqs = model_freqs
        self.chi2 = chi2

    def pdf(self, x): return 1.0/(2*numpy.pi)**0.5 / self.std * numpy.exp(-0.5 * (x - self.mean)**2 / self.std**2)
    
    def plot_data(self, color1 = '#33ff33', color2='#55ff55', label=None, *args, **vargs):
        return plotpdf(self.freqs, self.bins, bars=True, label=label, color1=color1, color2=color2)

    def plot_model(self, color = 'r', label=None, *args, **vargs):
        return plotpdf(self.model_freqs, self.bins, bars=False, label=label, color1=color)

    def plot_pdf(self, xx=None, *args, **vargs):
        if xx is None:
            xx = numpy.linspace(min(self.bins[numpy.isfinite(self.bins)]), max(self.bins[numpy.isfinite(self.bins)]), 256)
        return pylab.plot(xx, self.pdf(xx), *args, **vargs)

    @staticmethod
    def build(x, n=100):
        x = numpy.asarray(x).flatten()
        mean = numpy.mean(x)
        std = numpy.std(x)
        model_freqs, bins = build_model(len(x), n, mean, std)        
        freqs, bins = numpy.histogram(x, bins, new=True)
        
        return GaussianFit(mean, std, bins, freqs, model_freqs, chi2(freqs, model_freqs, 2))
