This is basically a continuation of the documentation of Roman’s improved method of combining temperature anomaly. First there are several plots which show the difference between the simple – average the anomaly method – used universally in climate science overlaid on the improved method.
Figure 1 - Two plots of global temperature anomaly. Red is the standard method used in climate science, and black is the improved method.
Note the slightly higher trend in the black method. This is created by the required offset corrections for incomplete temperature stations. Looking back, it’s kind of funny, I wouldn’t even calculate an Antarctic trend without the offsets. Nobody had to tell me, hey Jeff you should….. it’s obvious!! They are a conceptual necessity in calculation of a global trend. The more I consider it, the more idiotic it seems that climate ‘science’ doesn’t do it.
Below is the difference in trend according to these new methods. Please don’t pay too much attention to the confidence intervals b/c the data is filtered.
On the last thread Chip Knappenberger said;
The land-only CRU dataset has a trend of about 0.224C/decade since 1978 [edit], which is not all that much different from your 0.248C/dec.
Which is pretty cool. The difference between the corrected offset anomaly average and the standard is basically the same thing.
How cool is that! I mashed together global trends using my own standard method in Figure 3 and came within a few decimals of the pro’s. Like sea ice, this is not too bad.
Roman’s method is so obviously better Figure 4, as is the method I used for Antarctic trend but they are not used by the pro’s. It’s slightly higher in trend as we would expect.
It’s too much, it has to be said — the waministas who would trade their grandma’s for a few tenths of a degree, didn’t think of the most obvious and correct method to calc temp anomaly. I’m not even talking about Roman’s mastery of the subject but just about offsetting two anomalies to match each other. The method always increases trend — but it is correct!! Wow, in Mann08, I was convinced that the idiocy was fraud, this time, I think it’s just incompetence. It is like a whole new world has opened up.
Let’s leave my overactive (and over-vocal) thoughts for another post though. I promised the complete code last night and nobody called me out on it. I’ve attached everything except some plotting functions below. The plotting functions are not required for verification and they do add a lot of false complexity to the appearance. One of my goals is to get people to take a crack at using R and actually running this stuff. It’s not so heady that you can’t do it, and there is a ton to learn. If you need the plotting code, leave a comment.
###################################################
## gridded global trend calculation with offset####
###################################################
#### load external functions filtering used
source("http://www.climateaudit.info/scripts/utilities.txt") #Steve McIntyre
###################################################
######## data load ################################
###################################################
loc="C:/agw/R/GHCN/v2.mean."
wd=c(3,5,3,1,4,5,5,5,5,5,5,5,5,5,5,5,5)
ghmean=read.fwf(loc,widths=wd,comment.char="")
loc="C:/agw/R/GHCN/v2.country.codes"
wd=c(3,38)
ccode=read.fwf(loc,widths=wd)
loc="C:/agw/R/GHCN/v2.temperature.inv"
wd=c(3,5,4,30,8,7,4,6,1,10,3,2,16,1)
#wd=100
tinv=read.fwf(loc,widths=wd,comment.char="")
stationnum=nrow(tinv) #7280
#save(ghmean, ccode, stationnum,tinv ,file="GHCN data.RData")
#load("ghcn data.RData")
###################################################
######## functions ################################
###################################################
# many of these functions were written by Ryan O SteveM, NicL,RomanM
### Gausssian filter
ff=function(x)
{
filter.combine.pad(x,truncated.gauss.weights(11) )[,2]#31
}
### Sum of squares
ssq=function(x) {sum(x^2)}
################################################
######## roman M offset series combination #####
################################################
##### version2.0
### subfunction to do pseudoinverse
psx.inv = function(mat,tol = NULL)
{
if (NCOL(mat)==1) return( mat /sum(mat^2))
msvd = svd(mat)
dind = msvd$d
if (is.null(tol))
{
tol = max(NROW(mat),NCOL(mat))*max(dind)*.Machine$double.eps
}
dind[dind<tol]=0
dind[dind>0] = 1/dind[dind>0]
inv = msvd$v %*% diag(dind, length(dind)) %*% t(msvd$u)
inv
}
### subfunction to do offsets
calcx.offset = function(tdat,wts)
{
## new version
nr = length(wts)
delt.mat = !is.na(tdat)
delt.vec = rowSums(delt.mat)
row.miss= (delt.vec ==0)
delt2 = delt.mat/(delt.vec+row.miss)
co.mat = diag(colSums(delt.mat)) - (t(delt.mat)%*% delt2)
co.vec = colSums(delt.mat*tdat,na.rm=T) - colSums(rowSums(delt.mat*tdat,na.rm=T)*delt2)
co.mat[nr,] = wts
co.vec[nr]=0
psx.inv(co.mat)%*%co.vec
}
temp.combine = function(tsdat, wts=NULL, all=T)
{
### main routine
nr = nrow(tsdat)
nc = ncol(tsdat)
dims = dim(tsdat)
if (is.null(wts)) wts = rep(1,nc)
wts=wts/sum(wts)
off.mat = matrix(NA,12,nc)
dat.tsp = tsp(tsdat)
for (i in 1:12) off.mat[i,] = calcx.offset(window(tsdat,start=c(dat.tsp[1],i), deltat=1), wts)
colnames(off.mat) = colnames(tsdat)
rownames(off.mat) = month.abb
matoff = matrix(NA,nr,nc)
for (i in 1:nc) matoff[,i] = rep(off.mat[,i],length=nr)
temp = rowMeans(tsdat-matoff,na.rm=T)
pred=NULL
residual=NULL
if (all==T)
{
pred =c(temp) + matoff
residual = tsdat-pred
}
list(temps = ts(temp,start=c(dat.tsp[1],1),freq=12),pred =pred, residual = residual, offsets=off.mat)
}
#pick out those series with have at least nn + 1 observations in every month
#Outputs a logical vector with TRUE indicating that that sereis is OK
dat.check = function(tsdat, nn=0)
{
good = rep(NA,ncol(tsdat))
for (i in 1:ncol(tsdat)) good[i]= (min(rowSums(!is.na(matrix(tsdat[,i],nrow=12))))>nn)
good
}
######################################################################
##### slope plotting function with modifications for 12 month anomaly
######################################################################
get.slope=function(dat=dat)
{
len=length(dat)
month = factor(rep(1:12,as.integer(len/12)+1))
month=month[1:len] #first month by this method is not necessarily jan
#the important bit is that there are 12 steps
time2=time(dat)
coes = coef(lm(dat~0+month+time2))
residual=ts(rep(NA,len),start=time2[1],deltat=1/12)
fitline=array(NA,dim=c(len,12))
for(i in 1:12)
{
fitline[,i]=coes[13]*time2+coes[i]
}
anomaly=rep(NA,len)
for(i in 1:len)
{
residual[i]=dat[i]-fitline[i,month[i]]
}
anomaly = residual+time2*coes[13]-mean(time2*coes[13],na.rm=TRUE)
list(coes,residual,anomaly)
}
### plot average slope using roman's methods
plt.avg2=function(dat, st=1850, en=2011, y.pos=NA, x.pos=NA, main.t="Untitled") {
### Get trend
fm=get.slope(window(dat, st, en))
### Initialize variables
N=length(window(dat, st, en))
I=seq(1:N)/frequency(dat)
rmI=ssq(I-mean(I))
### Calculate sum of squared errors
SSE=ssq(fm[[2]][!is.na(fm[[2]])])/(N-2)
### Calculate OLS standard errors
seOLS=sqrt(SSE/rmI)*10
### Calculate two-tailed 95% CI
ci95=seOLS*1.964
### Get lag-1 ACF
r1=acf(fm [[2]], lag.max=1, plot=FALSE,na.action=na.pass)$acf[[2]]
### Calculate CIs with Quenouille (Santer) adjustment for autocorrelation
Q=(1-r1)/(1+r1)
ciQ=ci95/sqrt(Q)
maxx=max(time(fm[[3]]))
minx=min(time(fm[[3]]))
maxy=max(fm[[3]],na.rm=TRUE)
miny=min(fm[[3]],na.rm=TRUE)
### Plot data
plot(window(fm[[3]], st, en), main=main.t, ylab="Anomaly (Deg C)")
lfit=lm(window(fm[[3]], st, en)~I(time(window(fm[[3]], st, en))))
### Add trendline and x-axis
abline(a=lfit[[1]][1],b=lfit[[1]][2], col=2); abline(fm, col=4, lwd=2)
grid()
### Annotate text
tx=paste("Slope: ", round(fm[[1]][13]*10, 4)
,"Deg C/Decade", "+/-", round(ciQ, 4)
, "Deg C/Decade\n(corrected for AR(1) serial correlation)\nLag-1 value:"
, round(r1, 3))
text(x=ifelse(is.na(x.pos), (maxx-minx)*.2+minx,x.pos), y=ifelse(is.na(y.pos), (maxy-miny)*.8+miny, y.pos), tx, cex=.8, col=4, pos=4)
}
######## function plots monthly data with seasonal signal intact 12 slope curves
plt.seasonal.slopes=function(dat, st=1850, en=2011, y.pos=NA, x.pos=NA, main.t="Untitled")
{
### Get trend
fm=get.slope(window(dat, st, en))
### Initialize variables
N=length(window(dat, st, en))
I=seq(1:N)/frequency(dat)
rmI=ssq(I-mean(I))
### Calculate sum of squared errors
SSE=ssq(fm[[2]][!is.na(fm[[2]])])/(N-2)
### Calculate OLS standard errors
seOLS=sqrt(SSE/rmI)*10
### Calculate two-tailed 95% CI
ci95=seOLS*1.964
### Get lag-1 ACF
r1=acf(fm [[2]], lag.max=1, plot=FALSE,na.action=na.pass)$acf[[2]]
### Calculate CIs with Quenouille (Santer) adjustment for autocorrelation
Q=(1-r1)/(1+r1)
ciQ=ci95/sqrt(Q)
maxx=max(time(dat))
minx=min(time(dat))
maxy=max(dat,na.rm=TRUE)
miny=min(dat,na.rm=TRUE)
### Plot data
plot(window(dat, st, en), main=main.t, ylab="Temperature (Deg C)")
### Add 12 trendlines and x-axis
for(i in 1:12)
{
abline(a=fm[[1]][i],b=fm[[1]][13], col=2);
}
grid()
### Annotate text
tx=paste("Slope: ", round(fm[[1]][13]*10, 4),"C/Dec"
,"+/-", round(ciQ, 4)
,"C/Dec(AR(1)Lag-1 val:"
, round(r1, 3))
text(x=ifelse(is.na(x.pos), (maxx-minx)*.2+minx,x.pos), y=ifelse(is.na(y.pos), (maxy-miny)*.05+miny, y.pos), tx, cex=.8, col=4, pos=4)
}
dat=nh
plt.seasonal.slopes(dat,main.t="Typical Temperature Series")
#savePlot("c:/agw/plots/12 line seasonal sloope.jpg",type="jpg")
###########################################################
### combine staton data by insuring whether each series is in fact the same
###########################################################
getsinglestation=function(staid=60360, version=0)
{
staraw=NA
#raw data
smask= (ghmean[,2]==staid)
mask= ghmean[smask,3]==version
data=ghmean[smask,][mask,]
noser= levels(factor(data[,4]))
for(j in noser)
{
mask2=data[,4]==j
startyear=min(data[mask2,5])
endyear=max(data[mask2,5])
subd=data[mask2,6:17]
index=(data[mask2,5]-startyear)+1
dat=array(NA,dim=c(12,(endyear-startyear)+1))
for(k in 1:length(index))
{
dat[,index[k]]=as.numeric(subd[k,])
}
dim(dat)=c(length(dat),1)
dat[dat==-9999]=NA
dat=dat/10
rawd=ts(dat,start=startyear,deltat=1/12)
if(max(time(rawd))>=2011)
{
print ("error series")
rawd=NA
}
if(!is.ts(staraw))
{
staraw=rawd
}else{
staraw=ts.union(staraw,rawd)
}
}
if(!is.null(ncol(staraw)))
{
allraw=ts(rowMeans(staraw,na.rm=TRUE),start=time(staraw)[1],freq=12)
}else{
allraw=staraw
}
allraw
}
##############################################################
############ Assemble stations into grid Jeff Id #############
##############################################################
##assign stations to gridcells
gridval=array(NA,dim=c(stationnum,2))
gridval[,1]=as.integer((tinv[,5]+90)/5)+1 #lat
gridval[,2]=as.integer((tinv[,6]+180)/5)+1 #lon
#calculate gridcell trends
gridtem=array(NA,dim=c(36,72,211*12))
for(ii in 1:72)
{
for(jj in 1:36)
{
maska=gridval[,2]==ii
maskb=gridval[,1]==jj
maskc= maska & maskb
sta=NA ##initialize to non TS
for(i in (1:stationnum)[maskc])
{
#get all station trends for station ID
rawsta = getsinglestation(tinv[i,2],tinv[i,3])
#place all curves side by side in a ts structure
if(!is.ts(sta))
{
sta=rawsta
}else{
ts.union(sta,rawsta)
}
}
if(is.ts(sta))#if at least one time series exists
{
sta=window(sta,start=1800) #ignore pre-1800
index=as.integer(((time(sta)-1800)*12+.002)+1) #calculate time index for insertion into array
if(!is.null(ncol(sta))) #is station more than one column
{
gridtem[jj,ii,index]=temp.combine(sta)$temps
}else{
gridtem[jj,ii,index]=sta
}
print(jj) #progress debug
tit=paste("ii=",ii,"jj=",jj)
plot(ts(gridtem[jj,ii,],start=1800,freq=12),main=tit) #debug
}
}
print ("COLUMN") #progress debug
print(ii) #progress debug
}
###########################################
## calculate global trends using roman's offset combine on each grid
###########################################
rgl=gridtem
dim(rgl)=c(36*72,2532)
rgl=t(rgl)
rgl=ts(rgl,start=1800,deltat=1/12)
mask=!is.na(colMeans(rgl,na.rm=TRUE))
wt=rep(weight,72)
maskb=array(FALSE,dim=c(36,72))
maskb[1:18,]=TRUE
dim(maskb)=(36*72)
maskc=mask & maskb
sh=temp.combine(rgl[,maskc],wt[maskc])$temps
maskc=mask & !maskb
nh=temp.combine(rgl[,maskc],wt[maskc])$temps
plot(nh,main="Northern Hemisphere Temperature",xlab="Year", ylab="Deg J")
#savePlot("c:/agw/plots/NH temp - roman h.jpg",type="jpg")
plot(sh,main="Southern Hemisphere Temperature",xlab="Year", ylab="Deg J")
#savePlot("c:/agw/plots/SH temp - roman h.jpg",type="jpg")
un=ts.union(nh,sh)
glt=temp.combine(un)$temps
plot(glt,main="Global Temperature",xlab="Year", ylab="Deg J")
#savePlot("c:/agw/plots/global temp - roman h2.jpg",type="jpg")
plt.seasonal.slopes(window(glt,start=1900),main.t="Global Temperature Trend")
#savePlot("c:/agw/plots/global temp trend- roman h2.jpg",type="jpg")
plt.avg2(window(glt,start=1900),main.t="Global Temperature Anomaly")
#savePlot("c:/agw/plots/global temp anomaly- roman h2.jpg",type="jpg")
plt.avg2(window(glt,start=1900),main.t="Global Temperature Anomaly")
lines(hadcrut,col="red")
legend("bottomright",c("Ids","HadCru"),col=c(1,2),pch=10)
#savePlot("c:/agw/plots/id plus hadcrut.jpg",type="jpg")
nha=get.slope(nh)
plot(window(nha[[3]],1900),main="Northern Hemisphere Anomaly",ylab="Anomaly Deg C",xlab="Year")
#savePlot("c:/agw/plots/northern hemisphere anom.jpg",type="jpg")
sha=get.slope(sh)
plot(window(sha[[3]],1900),main="Southern Hemisphere Anomaly",ylab="Anomaly Deg C",xlab="Year")
#savePlot("c:/agw/plots/Southern hemisphere anom.jpg",type="jpg")
gla= get.slope(glt)
plot(ff(window(nha[[3]],1978)),main="Hemispheric Temperature Anomaly",ylab="Anomaly Deg C",xlab="Year")
lines(ff(window(sha[[3]],1978)),col=2)
legend("bottomright",c("Northern","Southern"),col=c(1,2),pch=10)
#savePlot("c:/agw/plots/hemisphere anom1978 .jpg",type="jpg")
plot(ff(window(nha[[3]],1978)),main="Hemispheric Temperature Anomaly",ylab="Anomaly Deg C",xlab="Year")
lines(ff(window(glava,1978)),col=2)
legend("bottomright",c("Northern","Southern"),col=c(1,2),pch=10)
#savePlot("c:/agw/plots/hemisphere anom1978 .jpg",type="jpg")
plt.avg2(ff(window(glt,1978)),main="Global Temperature Anomaly")
#savePlot("c:/agw/plots/Global anom1978 .jpg",type="jpg")
####################################
## global average without offset grid combinations
####################################
gta=array(NA,dim=c(36,72,211*12))
for(ii in 1:72)
{
for(jj in 1:36)
{
gta[jj,ii,]=calc.anom(gridtem[jj,ii,])
}
}
glav=rep(NA,211*12) #global ave
weight=sin(seq(2.5,180, 5)*(pi)/180) #cell weights
meanweight=mean(weight)
weight=weight/meanweight #normalize cell weights
#weight=rep(1,36) # calculate unweighted temp #uncomment for unweighted version
mask=rep(FALSE,36) #lon mask
mask[1:18]=TRUE #lon mask to allow southern hemisphere
for(kk in 1: (211*12)) #hemisphere 1
{
mm=rep(NA,72)
for(ii in 1:72)
{
mm[ii]=mean(gta[mask,ii,kk]*weight[1:18],na.rm=TRUE)
}
glav[kk]=mean(mm,na.rm=TRUE)
}
glava=ts(glav,start=1800,deltat=1/12) #southern hemisphere average
mask=!mask #select other hemisphere
for(kk in 1:(211*12))
{
mm=rep(NA,72)
for(ii in 1:72)
{
mm[ii]=mean(gta[mask,ii,kk]*weight[19:36],na.rm=TRUE)
}
glav[kk]=mean(mm,na.rm=TRUE)
}
glavb=ts(glav,start=1800,deltat=1/12) #north hemisphere average
glav=(glava+glavb)/2 #global average