%macro TreeGatekeeper(dataset,outdata); /* Computes adjusted p-values for the Bonferroni-based tree gatekeeping procedure. Written by Alex Dmitrienko. Inputs: DATASET = Data set defining the structure of the problem, including the family definition, raw p-values and weights, serial and parallel rejection sets. OUTDATA = Output data set with adjusted p-values. */ proc iml; use &dataset; read all var {family} into family; read all var {weight} into weight; read all var {raw_p} into raw_p; read all var {parallel} into par; read all var {serial} into ser; family=t(family); weight=t(weight); raw_p=t(raw_p); nhyps=ncol(family); nfams=family[ncol(family)]; nints=2**nhyps-1; h=j(nints,nhyps,0); parmat=j(nhyps,nhyps,0); sermat=j(nhyps,nhyps,0); do i=1 to nhyps; do j=1 to nhyps; * Parallel gatekeeping sets; parmat[i,j]=(substr(par[i,],j,1)="1"); * Serial gatekeeping sets; sermat[i,j]=(substr(ser[i,],j,1)="1"); end; end; do i=1 to nhyps; do j=0 to nints-1; k=floor(j/2**(nhyps-i)); if k/2=floor(k/2) then h[j+1,i]=1; end; end; v=j(nints,nhyps,0); hyp=j(nints,nhyps,0); adjp=j(nhyps,1,0); tempv=j(1,nhyps,0); * Indicators for parallel and serial gatekeeping sets; pind=j(1,nhyps,0); sind=j(1,nhyps,0); start bonf(p,w); bonfp=min(p[loc(w)]/w[loc(w)]); return(bonfp); finish bonf; do i=1 to nints; r=1; do j=1 to nfams; window=(family=j); * First family is primary; if j=1 then do; * Weights for the current family; tempv=r#window#weight#h[i,]; * How much weight is left?; r=r-sum(tempv); v[i,]=v[i,]+tempv; end; * Secondary families; if 10 then pind[k]=(family[k]=j)*(sum(parmat[k,])^=sum(parmat[k,]#h[i,])); if sum(parmat[k,])=0 then pind[k]=(family[k]=j); sind[k]=(family[k]=j)*(sum(sermat[k,]#h[i,])=0); end; * Weights for the current family; tempv=r#window#pind#sind#weight#h[i,]; * How much weight is left?; r=r-sum(tempv); v[i,]=v[i,]+tempv; *print j sind pind tempv; end; * The last family; if j=nfams then do; * Account for parallel and serial gatekeeping sets; do k=1 to nhyps; if sum(parmat[k,])>0 then pind[k]=(family[k]=j)*(sum(parmat[k,])^=sum(parmat[k,]#h[i,])); if sum(parmat[k,])=0 then pind[k]=(family[k]=j); sind[k]=(family[k]=j)*(sum(sermat[k,]#h[i,])=0); end; sumw=sum(window#pind#sind#weight#h[i,]); if sumw>0 then do; tempv=r#window#pind#sind#weight#h[i,]/sumw; v[i,]=v[i,]+tempv; end; *print j sind pind tempv; end; end; hyp[i,]=h[i,]*bonf(raw_p,v[i,]); end; vnum=j(nints,nhyps+1,0); hypnum=j(nints,nhyps+1,0); vnum[,2:nhyps+1]=v; vnum[,1]=t(1:nints); hypnum[,2:nhyps+1]=hyp; hypnum[,1]=t(1:nints); do i=1 to nhyps; adjp[i]=max(hyp[,i]); end; create adjp from adjp[colname={adjp}]; append from adjp; quit; data &outdata; merge &dataset adjp; format adjp 6.4; run; %mend TreeGatekeeper; * Example (study with three families of hypotheses and three sequences); data study; input hyp $ family weight raw_p parallel $ serial $; datalines; H11 1 0.5 0.010 000000 000000 H12 1 0.5 0.005 000000 000000 H21 2 0.5 0.030 000000 100000 H22 2 0.5 0.015 000000 010000 H31 3 0.5 0.035 000000 101000 H32 3 0.5 0.010 000000 010100 ; %TreeGatekeeper(dataset=study,outdata=out); proc print data=out; title "Bonferroni-based tree gatekeeping procedure"; run;