Source code for physoce.stats

import numpy as np
import numpy.ma as ma
from scipy import stats as st

[docs] def nancorr(x,y): """ r = nancorr(x,y) Calculate correlation matrix, treating NaN values as missing data """ x_msk = ma.masked_invalid(x) y_msk = ma.masked_invalid(y) r = ma.corrcoef(x_msk,y_msk) return r
[docs] def maxcorr(x,y,**options): """ (rmax,lag,ind) = maxcorr(x,y,**'maxlag'=int(len(x)/4)): Calculate the maximum lagged correlation between two 1D arrays Inputs: x,y are 1D arrays Options 'maxlag' the maximum number of lagged correlations to calculate (default: 1/4 of array length) Output: r is the correlation coefficient with the maximum absolute value lag is the lag of the maximum correlation (positive: y lags x) """ nrows = len(x) maxlag = int(np.floor(nrows/4)) if ('maxlag' in options): maxlag = options['maxlag'] # use masked arrays (mask NaNs) x = ma.masked_invalid(x) y = ma.masked_invalid(y) lags = np.arange(-maxlag,maxlag+1) rs = np.zeros(np.shape(lags)) for ni, lag in enumerate(lags): lag = lags[ni] if lag < 0: rs[ni] = ma.corrcoef(x[-lag:],y[:lag])[0,1] elif lag > 0: rs[ni] = ma.corrcoef(x[:-lag],y[lag:])[0,1] else: rs[ni] = ma.corrcoef(x,y)[0,1] ind = ma.argmax(np.abs(rs)) rmax = rs[ind] lag = lags[ind] return (rmax,lag,ind)
[docs] def rsig(r,nu): """ p-value for correlation coefficient r, and degrees of freedom nu INPUTS: r - correlation coefficient nu - degrees of freedom (N-2) OUTPUT: p - significance level/p-value significance levels of 0.05 and 0.01 correspond with Appendix E in Emery and Thomson (2004) Data Analysis Methods in Physical Oceanography """ # t value t = abs(r)*np.sqrt(nu)/np.sqrt(1-r**2) # significance level, using the "survival function" (1-cdf) p = 2*(st.t.sf(t,nu)) return p
[docs] def rcrit(nu,sig=0.05): """ Critical r (correlation coefficient), given significance level and degrees of freedom. INPUTS: nu - degrees of freedom (N-2) sig - significance level (default 0.05) OUTPUT: rcrit - critical r value Values for 0.05 and 0.01 correspond with Appendix E in Emery and Thomson (2004) Data Analysis Methods in Physical Oceanography """ # critical t value (this is equivalent to Matlab tinv function) t = st.t.ppf(1 - sig/2,nu) # critical r value rc = t/np.sqrt(t**2+nu) return rc
[docs] def linreggm(x,y): ''' Geometric mean (Type II) linear regression. Returns slope and intercept where y = slope*x + intercept References: - Ricker (1984) Computation and uses of central trend lines, Can. J. Zool., 62, 1897-1905. - Glover, Jenkins and Doney (2011) Modeling Methods for Marine Science. Section 3.2.5, Cambridge University Press. ''' # least-squares regression of y on x result_yx = st.linregress(x,y) # least-squares regression of x on y result_xy = st.linregress(y,x) slope = np.sqrt(result_yx.slope/result_xy.slope) slope = slope*np.sign(result_yx.slope) xbar = np.mean(x) ybar = np.mean(y) intercept = ybar - slope*xbar return slope,intercept