Source code for myutils

""" These utility functions may or may not be related to the oPDF method. Some of them are just general-purpose plotting or monitoring functions.
"""
import sys
import numpy as np
from scipy.stats import gaussian_kde,norm,chi2
import matplotlib
#matplotlib.user('Agg')
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from matplotlib.ticker import MaxNLocator # added 

[docs]def create31fig(sharex=True, sharey=False, figsize=(8,8)):
    '''create a figure with 3 tightly packed subplots'''
    f,ax = plt.subplots(3, sharex=sharex, sharey=sharey, figsize=figsize)
    f.subplots_adjust(hspace=0)
    plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False)
    nbins = len(ax[0].get_yticklabels())
    plt.setp([a.yaxis for a in ax[:-1]], major_locator=MaxNLocator(nbins=nbins, prune='lower',symmetric=True))
    return ax

[docs]def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
    '''
    Function to offset the "center" of a colormap. Useful for
    data with a negative min and positive max and you want the
    middle of the colormap's dynamic range to be at zero

    Input
    
      cmap : The matplotlib colormap to be altered
      
      start : Offset from lowest point in the colormap's range.
          Defaults to 0.0 (no lower ofset). Should be between
          0.0 and `midpoint`.
      
      midpoint : The new center of the colormap. Defaults to 
          0.5 (no shift). Should be between 0.0 and 1.0. In
          general, this should be  1 - vmax/(vmax + abs(vmin))
          For example if your data range from -15.0 to +5.0 and
          you want the center of the colormap at 0.0, `midpoint`
          should be set to  1 - 5/(5 + 15)) or 0.75
      
      stop : Offset from highets point in the colormap's range.
          Defaults to 1.0 (no upper ofset). Should be between
          `midpoint` and 1.0.
      
      Credit: http://stackoverflow.com/questions/7404116/defining-the-midpoint-of-a-colormap-in-matplotlib
    '''
    cdict = {
        'red': [],
        'green': [],
        'blue': [],
        'alpha': []
    }

    # regular index to compute the colors
    reg_index = np.linspace(start, stop, 257)

    # shifted index to match the data
    shift_index = np.hstack([
        np.linspace(0.0, midpoint, 128, endpoint=False), 
        np.linspace(midpoint, 1.0, 129, endpoint=True)
    ])

    for ri, si in zip(reg_index, shift_index):
        r, g, b, a = cmap(ri)

        cdict['red'].append((si, r, r))
        cdict['green'].append((si, g, g))
        cdict['blue'].append((si, b, b))
        cdict['alpha'].append((si, a, a))

    newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
    plt.register_cmap(cmap=newcmap)

    return newcmap
  
[docs]def plot_circle(cen=[0,0], r=1, **kwargs):
	'''plot a circle'''
	phi=np.arange(0,2*np.pi+0.11,0.1)
	x=cen[0]+r*np.cos(phi)
	y=cen[1]+r*np.sin(phi)
	h=plt.plot(x,y,**kwargs)
	return h

def lnADPDF(lnAD):
  gpar=[[0.569,   -0.570,    0.511], [ 0.431,    0.227,    0.569]] #w, mu, sigma, bi-normal fit
  p=gpar[0][0]*norm.pdf(lnAD,gpar[0][1],gpar[0][2])+gpar[1][0]*norm.pdf(lnAD, gpar[1][1], gpar[1][2])
  return p
  
[docs]def ADSurvFunc(AD):
  '''survival function for AD test on uniform distributions'''
  gpar=[[0.569,   -0.570,    0.511], [ 0.431,    0.227,    0.569]] #w, mu, sigma, bi-normal fit
  lnAD=np.log(AD)
  p=gpar[0][0]*norm.sf(lnAD,gpar[0][1],gpar[0][2])+gpar[1][0]*norm.sf(lnAD, gpar[1][1], gpar[1][2])
  return p
  #return norm.sf(np.log(AD),loc=-0.22,scale=0.66);

[docs]def P2Sig(pval):
  """convert pval to sigma"""
  #return norm.ppf(1.0-pval/2)
  return -norm.ppf(pval/2) #equivalent to the above, but more accurate

[docs]def AD2Sig(AD):
  """convert AndersonDarling TS to sigma"""
  AD=np.array(AD)
  sig=P2Sig(ADSurvFunc(AD))
  #sig[AD>5]=(np.log(AD[AD>5])+0.22)/0.66
  return sig

[docs]def Sig2TS(sig,dof=1):
  '''convert a sigma value to the 2*likelihood ratio'''
  return chi2.ppf(chi2.cdf(np.array(sig)**2,1),dof)

[docs]def Chi2Sig(x, dof):
  '''convert chi-square value to significance level, for dof degrees of freedom'''
  return P2Sig(chi2.sf(x,dof))
  
[docs]class ProgressMonitor:
	"""monitor progress of your loops"""
	def __init__(self,total_steps, total_show=100, init_show=0):
		"""init_show: initial value progress percentage, set to 0 if no reason
		total_steps: maximum iteration steps to monitor
		total_show: number of revealing times
		"""
		self.current_show=int(init_show)
		self.total_steps=total_steps
		self.total_show=total_show
		print " %02d%%"%(self.current_show*100/self.total_show),
		sys.stdout.flush()
		
[docs]	def monitor_progress(self,current_step):
		"""put this inside loop to monitor progress, to print the percent of
		job finished."""
		#print when current_step first exceeds current show percent
		if current_step>=self.total_steps*self.current_show/self.total_show:
			print "\b\b\b\b\b %02d%%"%(self.current_show*100/self.total_show),
			sys.stdout.flush()
			self.current_show+=1
			
[docs]def percent2level(p,z):
    ''' convert percentiles to levels '''
    try:
      p=list(p)
    except: #single number
      p=[p]
    x=np.sort(np.array(z.ravel()))
    x=x[::-1]
    frac=x.cumsum()/x.sum()
    l=[x[abs(frac-pi).argmin()] for pi in p]
    return l

[docs]def contour_handle(color, linestyle='solid'):
  '''return a patch object to be used for labelling patch objects in legends'''
  return Ellipse((0,0),0,0,fill=False, color=color, linestyle=linestyle)  

[docs]def density_of_points(data, bins=100, method='kde', weights=None):
  ''' estimate density of points with kde or histogram2d 
  
  data: should be shape [2,n] array
  
  bins: can be an integer or [nx,ny] for number of bins, an ndarray or a list of two arrays  for bin edges
  
  method: 'kde' or 'hist', kernel-density-estimate or 2d-histogram estimate
  
  weights: whether to use weights or not. currently only supports hist method.
  
  return: (X,Y,Z)
	  ready to be used for contour plots as contour(X, Y, Z). 
	  X and Y are mid points of the bins on which Z is calculated.
  '''
  if data.shape[0]!=2 and data.shape[1]==2:
	data=data.T
  l=data.min(axis=1)
  r=data.max(axis=1)
  if isinstance(bins, int):  
	  x=np.linspace(l[0], r[0], bins+1)
	  y=np.linspace(l[1], r[1], bins+1)
  elif isinstance(bins, np.ndarray):
	  x=bins
	  y=bins
  elif isinstance(bins, list):
	if isinstance(bins[0], int):
	  x=np.linspace(l[0], r[0], bins[0]+1)
	  y=np.linspace(l[1], r[1], bins[1]+1)
	else:  
	  x=bins[0]
	  y=bins[1]
  
  X,Y=np.meshgrid((x[:-1]+x[1:])/2, (y[:-1]+y[1:])/2) #mid points
  if method=='kde':
	  positions =np.vstack([X.ravel(), Y.ravel()])
	  kernel = gaussian_kde(data)
	  Z = np.reshape(kernel(positions).T, X.shape)
  else:
	  Z=np.histogram2d(data[0], data[1], bins=[x, y], weights=weights)
	  Z=Z[0].T
  return X,Y,Z
	  
[docs]def percentile_contour(X,Y,Z, percents=0.683, colors=None, fill=False, linestyles='solid', **kwargs):
  """
  plot contour at specific percentile levels

	  X,Y can be both 2-d arrays as Z, or 1-d array specifying the column(horizontally varying) and row coordinates for Z.
  
	  percents can be a list, specify the contour percentile levels
  
	  colors should be a tuple, e.g, (r,) 
  
	  fill: bool, whether to plot filled contours
  
	  kwargs specify linestyles
  
  return:
	  a handle artist of the same linestyle (but not the contour object) to be used in legends
  """    
  #if type is 'image':
	#extent=(x.min()-(x[1]-x[0])/2, x.max()+(x[1]-x[0])/2, y.min()-(y[1]-y[0])/2, y.max()+(y[1]-y[0])/2)
	#if colors is None:
	  #colors=plt.cm.summer
	#h0=plt.imshow(Z, extent=extent, cmap=colors)
  #else:
  lvls=percent2level(percents,Z)
  if fill:
	h0=plt.contourf(X,Y,Z,lvls, colors=colors, linestyles=linestyles, **kwargs)
  else:
	h0=plt.contour(X,Y,Z,lvls, colors=colors, linestyles=linestyles, **kwargs)

  try:
	color=list(colors)[0]
  except:
	color=colors
  h=Ellipse((0,0),0,0,fill=fill, color=color, linestyle=linestyles, **kwargs)
  
  return h,h0,lvls

[docs]def get_extent(X,Y):
  ''' get extent for X,Y vectors or meshgrids. 
  
  the output is (xmin,xmax,ymin,ymax), the edge-padded boudaries,
      assuming X,Y specifies the mid points of bins and uniformly spaced.
      
  can be used to specify extent for imshow()'''
  if X.squeeze().ndim==2:
	dx=X[0,1]-X[0,0]
	dy=Y[1,0]-Y[0,0]
  else:
	dx=X[1]-X[0]
	dy=Y[1]-Y[0]
  extent=(X.ravel().min()-dx/2, X.ravel().max()+dx/2, Y.ravel().min()-dy/2, Y.ravel().max()+dy/2)
  return extent

[docs]def plot_cov_ellipse(cov, pos, nstd=1, fill=False, ax=None, **kwargs):
    """
    Plots an `nstd` sigma error ellipse based on the specified covariance
    matrix (`cov`). Additional keyword arguments are passed on to the 
    ellipse patch artist.

    Parameters
    
        cov : The 2x2 covariance matrix to base the ellipse on
        
        pos : The location of the center of the ellipse. Expects a 2-element
            sequence of [x0, y0].
        
        nstd : The radius of the ellipse in numbers of standard deviations.
            Defaults to 1 standard deviations.
        
        ax : The axis that the ellipse will be plotted on. Defaults to the current axis.
        
        Additional keyword arguments are pass on to the ellipse patch.

    Returns
        A matplotlib ellipse artist
    """
    def eigsorted(cov):
        vals, vecs = np.linalg.eigh(cov)
        order = vals.argsort()[::-1]
        return vals[order], vecs[:,order]

    if ax is None:
        ax = plt.gca()

    TS={1:2.3, 2:6.18, 3:11.8} #the TS=x'* at the specific nstd, for a 2-d
    vals, vecs = eigsorted(cov)
    theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))

    # Width and height are "full" widths, not radius
    width, height = 2 * np.sqrt(TS[nstd]*vals)
    ellip = Ellipse(xy=pos, width=width, height=height, angle=theta, fill=fill, **kwargs)
    #ellip.set_facecolor('none')
    ax.add_artist(ellip)
    return ellip
  
[docs]def skeleton(x,y,nbin=10,alpha=0.683,weights=None):
	"""
	to divide x into bins and give estimation of center and variance of y inside each bin
	
	input:
		x,y: column vectors to extract skeleton from
		
		nbin: number of bins or bin edges for x
		
		alpha: confidence level for boundary estimation
	"""

	x=np.array(x)
	y=np.array(y)
	
	count,xbin=np.histogram(x,nbin,weights=weights)
	nbin=len(xbin)-1
	bin=np.digitize(x,xbin)-1
	
	xm=np.empty(nbin)
	#ym=xm[:]     #this is wrong! even though id(ym)!=id(xm), and id(ym[0])!=id(xm[0])
	ym=np.empty_like(xm)
	ysig=np.empty_like(xm)
	xmed=np.empty_like(xm)
	ymed=np.empty_like(xm)
	ylim=np.empty([2,nbin]);
	alpha=(1-alpha)/2;
	
	for i in xrange(nbin):
		if weights is not None:
		  xm[i]=np.sum(x[bin==i]*weights[bin==i])/np.sum(weights[bin==i])
		  ym[i]=np.sum(y[bin==i]*weights[bin==i])/np.sum(weights[bin==i])
		xm[i]=np.mean(x[bin==i])
		xmed[i]=np.median(x[bin==i])
		ym[i]=np.mean(y[bin==i])
		ymed[i]=np.median(y[bin==i])
		ysig[i]=np.std(y[bin==i])
		if np.sum(bin==i):
			ylim[:,i]=np.percentile(y[bin==i], [alpha*100, (1-alpha)*100])
		else:
			ylim[:,i]=[np.NaN,np.NaN]
			
	return {'x':{'median':xmed,'mean':xm,'bin':xbin,'hist':count},'y':{'median':ymed,'mean':ym,'std':ysig,'CI':ylim}}

class Enum(object):
  def __init__(self, names):
    for number, name in enumerate(names.split()):
      setattr(self, name, number)
      
class NamedValues(object):
  def __init__(self, value, name):
	self.value=value
	self.name=name
  def __repr__(self):
	return self.name
  def __str__(self):
	return self.name  

class NamedEnum(object):
  def __init__(self, names):
	'''create a named enum object from a list of empty-space separated names'''
	for number, name in enumerate(names.split()):
	  setattr(self, name, NamedValues(number, name))

#import ctypesGsl as cgsl
#def fmin_gsl(func, x0, args=[], xtol=1e-3, ftolabs=0.01, xstep=1.0, maxiter=1000, full_output=False):
    #'''
    #minimize function with gsl_simplex method
    #func(x [,args]): function to be minimized
    #x0: [a,b,c...], initial parameter
    #args: list of additional parameter if any
    #xstep: initial simplex size
    #mimics scipy.optimize.fmin() interface
    #this fmin() is faster and more accurate than the scipy.optimize.fmin(), also better than fmin_powell() in scipy.
    #'''
    #args=list(args)
    #if args is []:
	  #myfunc=lambda x,arg: func(x)
    #else:
	  #myfunc=lambda x,arg: func(x, *arg)
    #F = cgsl.gsl_multimin_function(myfunc, len(x0), args)
    #x = cgsl.vector(x0)
    #T = cgsl.multimin_fminimizer_nmsimplex
    #s = cgsl.multimin_fminimizer(T, F)
    #s.init(x, cgsl.vector([xstep] * F.n))
    #it = 0
    #f1=F(x)
    #while True:
        #it += 1
        #s.iterate()
        #f0=f1
        #f1=s.minimum()
        #status = s.test_size(xtol)
        #xx = s.x()
        #if status and abs(f1-f0)<ftolabs:
            #print "Optimization terminated successfully."
            #print "\t Current function value: ", f1
            #print "\t Iterations: ", it
            #print "\t x abs err: ", s.size()
            #print "\t", xx
            #status=0
            #break
        #if it >= maxiter:
            #print "Maximum number of %d iterations reached"%maxiter
            #print "Failed to converge"
            #status=1
            #break
    #x=np.array([xx[i] for i in xrange(F.n)])
    #if full_output:
      #return x,f1,it,status
    #else:
      #return x