version 5.00 2 frmOutput frmgraph frmMain 3 6 1 0 0 3 0 0 6 0 0 1 0 0 0 0 0 0 0 0 238 268 0 0 0 Sub designer addform(frmOutput,"Results","",220,220,220)@ addbutton(frmoutput,btnGraph,155,235,75,23,"GRAPH",212,208,200,0,0,0,True,True,9)@ addtextbox(frmoutput,txtbxPsiLog,5,5,230,220,"TextBox6",255,255,255,0,0,0,True,True,True,9)@ addform(frmgraph,"Graph","",220,220,220)@ addbutton(frmgraph,btnINPUT,20,235,60,23,"INPUT",212,208,200,0,0,0,True,True,9)@ addbutton(frmgraph,btnBACK,85,235,60,23,"BACK",212,208,200,0,0,0,True,True,9)@ addbutton(frmgraph,btnEXIT,150,235,60,23,"EXIT",212,208,200,0,0,0,True,True,9)@ addform(frmMain,"LogReg","",211,211,211)@ addbutton(frmmain,btnExitMain,150,125,75,20,"EXIT",212,208,200,0,0,0,True,True,9)@ addbutton(frmmain,btnRUN,150,95,75,23,"RUN",212,208,200,0,0,0,True,True,9)@ addtextbox(frmmain,txtbxData,10,95,95,165,"TextBox5",255,255,255,0,0,0,True,True,True,9)@ addcheckbox(frmmain,chkbxGrouped,5,70,190,25,"Check if summary data",211,211,211,0,0,0,True,True,False,9)@ addtextbox(frmmain,txtbxNR,190,40,40,22,"",255,255,255,0,0,0,True,True,False,9)@ addlabel(frmmain,lblNR,5,40,175,25,"Number of predictor variables",211,211,211,0,0,0,True,True,9)@ addtextbox(frmmain,txtbxNC,190,10,40,22,"",255,255,255,0,0,0,True,True,False,9)@ addlabel(frmmain,lblNC,5,10,100,20,"Number of cases",211,211,211,0,0,0,True,True,9)@ End Sub @EndOfDesignText@'TO DO LIST 'Norm functie 'clear variables before next run 'graph implementation 'spaces removal in data 'return when stringlength < 0 Sub Globals nC =10 '// 10 cases nR = 1 '// independent variable nP = 1: nP1 = 1 CheckGrouped = 0 MaxX = 0: MinX = 0 d = 0 sY0 = 0: sY1 = 0: sC = 0 'DATAString = "0,0,0" & CRLF & "0,1,0" & CRLF & "1,1,1" & CRLF & "1,1,0" & CRLF & "0,1,0" & CRLF & "0,1,0" & CRLF & "0,1,1" & CRLF&"0,0,0"& CRLF & "0,1,0" & CRLF & "0,0,0" DATAString = "1,0" & CRLF & "2,0" & CRLF & "3,0" & CRLF & "4,0" & CRLF & "5,1" & CRLF & "6,0" & CRLF & "7,1" & CRLF&"8,0"& CRLF & "9,1" & CRLF & "10,1" D$ = "": S$ = "" dd=0 s=0 LLn = 0 v = "" vv = 0 DIM X(0) DIM xM(0) DIM xSD(0) DIM Y0(0) DIM Y1(0) DIM Par(0) DIM SEP(0) DIM Arr(0) DIM Graph(0) End Sub '****************************** Sub App_Start SIP(false) Init frmMain.show DataEditor End Sub '****************************** Sub btnRUN_Click frmOutput.Show PsiLog End Sub '****************************** Sub btnGraph_Click GraphXY frmGraph.Show End Sub '****************************** Sub btnBACK_Click frmOutput.Show End Sub '****************************** Sub btnINPUT_Click frmMain.Show End Sub '****************************** Sub btnEXIT_Click AppClose End Sub '****************************** Sub btnExitMain_Click AppClose End Sub '****************************** Sub Init txtbxNC.text = NC '// testdata has 10 cases txtbxNR.text = NR '// with one independent variable txtbxData.Text = Datastring txtbxNC.Focus End Sub '****************************** Sub DataEditor v = "" nC = txtbxNC.Text nR = txtbxNR.Text nP = nR+1 nP1 = nP+1 'chkbxGrouped.Checked = FALSE IF chkbxGrouped.Checked = TRUE then CheckGrouped = 1 ELSE CheckGrouped = 0 END IF ' // make copy of buffer in buffercopy D$ = DataString FOR i = 0 to StrLength(D$)-1 k = StrAt(D$,i) IF k = cTAB then StrReplace(D$,cTAB,",") 'Make Comma ELSE IF NOT(((ASC(k) >= 48) AND (ASC(k) <= 57)) OR (ASC(k)=44)) '{empty} AND NOT in 0...9 StrReplace(D$,k,CRLF) END IF NEXT 'Depending on the operating system (PC, Mac, Unix), multi-line text can use a carriage return (Mac), linefeed (UNIX), or carriage-return-and-linefeed (PC) to delimit the end of a line, so I look to see what's going on, and set the NL variable accordingly: 'if( da.indexOf(NL)==-1 ) { if( da.indexOf(CR)>-1 ) { NL = CR } else { NL = LF } } 'if string isn't closed by CRLF, close with CRLF 'update length string k = StrAt(D$,StrLength-1) IF k <> CRLF then D$ = D$ & CRLF END IF CreatePointers '// set Max very low and Min very high MaxX = -2E10 MinX = 2E10 sY0 = 0 sY1 = 0 sC = 0 'This loop parses one row of data: ' 'for (i = 0; i "" then 'check if datarow <> empty v = "" 'substring is empty 'v = read left of CRLF v = SubString(D$,0, StrIndexOf(D$,CHR(13),0)) 'take v out of copy D$ = StrRemove(D$,0,StrIndexOf(D$,CHR(13),0)+2) '2 extra places for CR & LF 'next loop gets each variable from the row of data FOR j = 1 to nR 'repeat reading data until number of IVs d = SubString(v,0,StrIndexOf(v,",",0)) 'what is value left of comma? IF d > MaxX then MaxX = d 'set MaxX IF d < MinX then MinX = d 'set MinX X(i*(nR+1)+j) = d v = SubString(v,StrIndexOf(v,",",0)+1,StrLength(v)-StrIndexOf(v,",",0)) NEXT 'count cases 1 and 0 IF ChkbxGrouped.Checked = TRUE then 'for zeros d = SubString(v,0,StrIndexOf(v,",",0)) 'what is value left of comma for zeros IF d > MaxX then MaxX = d 'set MaxX IF d < MinX then MinX = d 'set MinX Y0 (i) = d sY0 = sY0+d v = SubString(v,StrIndexOf(v,",",0)+1,1) 'reset dd$, right of comma d = SubString(v,0,StrIndexOf(v,",",0)) 'what is value left of comma for zeros 'for ones 'what is value left for ones Y1(i) = d sY1 = sY1+d ELSE IF IsNumber(v) then Select v Case 0 Y0(i) = 1 sY0 = sY0+1 Case 1 Y1(i) = 1 sY1 = sY1+1 End Select END IF END IF sC = sC+(Y0(i)+Y1(i)) END IF '// end check if string <> empty '// calculate sums for mean and SD FOR j = 1 to nR xx = X(i*(nR+1)+j) xM(j) = xM(j)+(Y0(i)+Y1(i))*xx xSD(j) = xSD(j)+(Y0(i)+Y1(i))*xx*xx NEXT NEXT '// end READ DATA' ' // set MinD and MaxD on x-axis graph ' MinD% = 15 ' MaxD% = 140 ' // compute scaling X-value on graph ' Slope = (MaxD%-MinD%)/(MaxX-MinX) ' Intercept = MinD%-Slope*MinX ' GraphXY: ' // draw probability line ' GraphProb: ' DestroyPointers: ' ENDIF // Continue End Sub '****************************** Sub PsiLOG txtbxPsiLog.Text = "" LLn = 0 s = 1 vv = 0 '// initialise OutputString S$ = "" 'print out summaries IF CheckGrouped <> 0 then S$ = S$ & "Descriptives...data grouped." & CRLF ELSE S$ = S$ & "Descriptives...data ungrouped." & CRLF END IF S$ = S$ & CRLF S$ = S$ & FORMAT(sY0,"N0") & " cases have Y=0" & CRLF S$ = S$ & FORMAT(sY1,"N0") & " cases have Y=1" & CRLF S$ = S$ & CRLF S$ = S$ & "Var. Mean SD " & CRLF FOR j = 1 to nR xM(j) = xM(j)/sC xSD(j) = xSD(j)/sC xSD(j) = SQRT(ABS(xSD(j)-xM(j)*xM(j))) S$ = S$ & FORMAT(j,"N0") & " " & FORMAT(xM(j),"N4") & " " & FORMAT(xSD(j),"N4") NEXT xM(0) = 0 'apparently useless action? xSD(0) = 1 'apparently useless action? 'scaling the predictor variables FOR i = 0 to nC-1 FOR j = 1 to nR X(i*(nR+1)+j) = (X(i*(nR+1)+j)-xM(j))/xSD(j) NEXT NEXT S$ = S$ & CRLF & CRLF S$ = S$ & "Iteration History..." & CRLF 'S$ = S$ & "-2 * LogLikelihood and Chi²..." 'Intercept will probably be close to LN of ratio numbers '1 to 0 Par(0) = LN(sY1/sY0) FOR j = 1 to nR Par(j) = 0 NEXT 'set LLp initially unusally high LnV = 0 Ln1mv = 0 LLp = 2E10 LL = 1E10 'Main Iteration loop DO WHILE (ABS(LLp-LL) > 0.00001) 'Main Loop LLp = LL LL = 0 FOR j = 0 to Nr FOR k = j to nR+1 ARR(J*(Nr+2)+K) = 0 NEXT NEXT 'for each case, evaluate the predicted probability of getting 'the outcome, using the current guesses to the parameters FOR i = 0 to nC-1 'CASE vv = Par(0) FOR j = 1 to nR vv = vv + Par(j) * X(i*(nR+1)+j) NEXT IF (vv > 15) then LnV = -cE^(-vv) Ln1mv = -vv q = cE^(-vv) vv = cE^(LnV) ELSE IF (vv <= 15) then LnV = vv Ln1mv = -cE^(vv) q = cE(vv) vv = cE^(LnV) ELSE vv = 1/(1+cE^(-vv)) LnV = Ln(vv) Ln1mv = Ln(1-vv) q = vv*(1-vv) END IF END IF LL = LL - 2 * Y1 (i) * LnV - 2 * Y0 (i) * Ln1mv 'Build up the sums of the crossproducts FOR j = 0 to nR xij = X(i*(nR+1)+j) Arr(j*(nR+2)+nR+1) = Arr(j*(nR+2)+nR+1)+xij*(Y1(i)*(1-v)+Y0(i)*(-v)) FOR k = j to nR Arr(j*(nR+2)+k) = Arr(j*(nR+2)+k)+xij*X(i*(nR+1)+k)*q*(Y0(i)+Y1 (i)) NEXT NEXT NEXT 'CASE IF (LLp = 1E10) then LLn = LL S$ = S$ & "-2LL (NULL) = " & FORMAT(A(LL),"N4") & CRLF ELSE IF (ABS(LLp-LL) <= 0.00001) S$ = S$ & "-2LL (Conv.) = " & FORMAT(A(LL), "N4") & CRLF '// ELSE '// S$(s%) = "-2LL = "+FIX$(A:(LL),3,7) END IF 'The simultaneous matrix is symmetrical. We just calculated 'one half of it, so now we set the other half equal to its 'transpose FOR j = 1 to nR k = 0 FOR k = 0 to j-1 Arr(j*(nR+2)+k) = Arr(k*(nR+2)+j) NEXT NEXT 'Worlds shortest matrix inverter / simultaneous solver FOR i = 0 to nR 'MATRIX S = Arr(i*(nR+2)+i) Arr (i*(nR+2)+i) = 1 FOR k = 0 to nR+1 Arr(i*(nR+2)+k) = Arr(i*(nR+2)+k)/S NEXT FOR j = 0 to nR IF (i <> j) then S = Arr(j*(nR+2)+i) Arr(j*(nR+2)+i) = 0 FOR k = 0 to nR+1 Arr(j*(nR+2)+k) = Arr(j*(nR+2)+k)-s*Arr(i*(nR+2)+k) NEXT END IF NEXT NEXT 'END MATRIX 'Newton's method produces "increments" to the 'parameters, so let's increment them now FOR j = 0 to nR Par(j) = Par(j)+Arr(j*(nR+2)+nR+1) NEXT LOOP 'Main Loop ' GOTO E1CONT:: ' E1:: ' ONERR OFF ' gIPRINT "error calculating LogLikelihood..." ' E1CONT:: 'Once the iterations have converged to 5 decimals '(for LL), we arrive here CSq = (LLn-LL) S$ = S$ & CRLF S$ = S$ & "Overall Model Fit....(Gm)" & CRLF S$ = s$ & "ChiSq" & cTAB & " = " & FORMAT(A(CSq),"N4") & CRLF S$ = S$ & "df" & cTAB & " = " & FORMAT(nR,"N0") & CRLF S$ = S$ & "p" & cTAB & " = " & FORMAT(ChiSq(CSq,nR),"N4") & CRLF 'Reverse scaling on the resulting regression coefficients 'and their standard errors S$ = S$ & CRLF S$ = S$ & "Coefficients and Standard Errors..." & CRLF S$ = S$ & "Var B SE p" & CRLF 'Wald = & FORMAT( (Par (j)/SEP (j))^2,"N4" ) & " " FOR j = 1 to nR Par (j) = Par(j)/xSD(j) SEP (j) = SQRT(Arr(j*(nP+1)+j)) / xSD(j) Par (0) = Par(0) - Par(j) * xM(j) S$ = S$ & FORMAT(j,"N0") & " " & FORMAT(Par(j),"N4") & " " & FORMAT(A(SEP(j)),"N4") & " " & FORMAT(Norm(ABS(Par(j)/A(SEP (j)))),"N4") & CRLF NEXT S$ = S$ & "Intercept" & " " & FORMAT(Par (0),"N4") & CRLF S$ = S$ & CRLF S$ = S$ & "Odds Ratios and 95% Confidence Intervals..." & CRLF S$ = S$ & "Var O.R. Low High" & CRLF S$ = S$ & CRLF 'Odds ratios are just the exponentiated regresssion coefficients FOR j = 1 to nR ORc = cE^(Par(j)) ORl = cE^(Par(j) - 1.96 * SEP(j)) ORh = cE^(Par(j) + 1.96 * SEP(j)) S$ = S$ & FORMAT(j,"N0") & " " & FORMAT(ORc,"N4") & " " & FORMAT(ORl,"N4") & " " & FORMAT(ORh,"N4") & CRLF NEXT S$ = S$ & CRLF 'Now let's show the observed and calculted outcomes for each data point FOR j = 1 to nR S$ = S$ & "X" & FORMAT(j,"N0") & " " NEXT IF CheckGrouped <> 0 then S$ = S$ & "n0 n1 Calc Prob" & CRLF ELSE S$ = S$ & "Y Calc Prob" & CRLF END IF FOR i = 0 to nC-1 v = Par (0) FOR j = 1 to nR xx = xM(j)+ (xSD(j) * X(i*(nR+1)+j) ) v = v + Par(j)*xx S$ = S$ & FORMAT(xx,"N0") & " " NEXT v = 1 / (1+cE^(-v)) IF CheckGrouped <> 0 then S$ = S$ & FORMAT(Y0 (i),"N0") & " " & FORMAT(Y1 (i),"N0") & " " & FORMAT(v,"N4") & CRLF ELSE S$ = S$ & FORMAT(Y1 (i),"N0") & " " & FORMAT(v,"N4") & CRLF END IF NEXT txtbxPsiLog.Text = S$ End Sub '****************************** Sub Norm(zz) qq = zz*zz IF (ABS(zz) > 7 ) then Return (1-1/qq+3/(qq*qq))*cE^(-qq/2)/ABS(zz)*SQRT(cPi/2) ELSE Return ChiSq(qq,1) END IF End Sub '****************************** Sub ChiSq(xx,nn) IF (xx > 1000 OR nn > 1000) then qq = NormDistr(((xx/nn)^(1/3)+2/(9*nn)-1)/Sqrt(2/(9*nn))) /2 IF (xx > nn) then return qq ELSE return 1-qq END IF END IF pp = cE^(-0.5*xx) IF (nn MOD 2)= 1 then pp = pp*SQRT(2*xx/cPi) END IF kk = nn DO WHILE (kk >= 2) pp = pp*xx/kk kk = kk-2 LOOP tt = pp aa = nn DO WHILE (tt > 1e-15*pp) aa = aa+2 tt = tt*xx/aa pp = pp+tt LOOP Return 1-pp End Sub '****************************** Sub NormDistr(sd) pp = -2,72792e-10*(sd^10)+1,21975e-05*(sd^9)+1,93187e-08*(sd^8)-0,000463167*(sd^7)-3,079e-07*(sd^6)+0,007191084*(sd^5)+1,61915e-06*(sd^4)-0,061668162*(sd^3)-2,58203E-06*(sd^2)+0,396491592*sd+0,500000587 pp = pp*2 Return pp End Sub '****************************** Sub A(pp) xx = pp IF (pp>=0) then xx = xx+0.00005 ELSE xx = xx-0.00005 END IF RETURN xx End Sub '****************************** Sub CreatePointers DIM X(nC*(nR+1)) FOR i = 0 to ArrayLen(X())-1 X (i) = 0 NEXT DIM Y0(nC) FOR i = 0 to ArrayLen(Y0())-1 Y0 (i) = 0 NEXT DIM Y1(nC) FOR i = 0 to ArrayLen(Y1())-1 Y1 (i) = 0 NEXT DIM xM(nR+1) FOR i = 0 to ArrayLen(xM())-1 xM (i) = 0 NEXT DIM xSD(nR+1) FOR i = 0 to ArrayLen(xSD())-1 xSD (i) = 0 NEXT DIM Par(nP) FOR i = 0 to ArrayLen(Par())-1 Par (i) = 0 NEXT DIM SEP(nP) FOR i = 0 to ArrayLen(SEP())-1 SEP (i) = 0 NEXT DIM Arr(nP*nP1) FOR i = 0 to ArrayLen(Arr())-1 Arr(i) = 0 NEXT End Sub '****************************** Sub GraphXY 'local y0%,t%,Graph ,x,y,xx,yy 'initialise GraphPointer to allocate number of ones and zeros 'for every value of independent variable 'DIM Graph ((MaxX-MinX+1+1)*(nR+1)) 'FOR i = 0 to (MaxX-MinX+1+1)*(nR+1) 'Graph (i*(nR+1)+0) = 1 'initial size-1 graphdot value ZERO 'Graph (i*(nR+1)+1) = 1 'initial size-1 graphdot value ONE ' NEXT FOR i = 0 to nC-1 ' j% = 1 ' WHILE (j% <= nR%) ' xx = PEEKF( X +(i%*(nR%+1)+j%)*FSize) ' j%++ ' ENDWH ' ' x = 15+INT(xx*Slope+Intercept) ' IF ChkGrouped% <> 0 then ' y = PEEKF (Y1 +i%*FSize) ' IF y <> 0 then ' y0% = INT(y) ' y = YY%+15-INT(YY%) // at one level ' t%=1 ' WHILE (t% <= y0%) ' gAT INT(x),INT(y) ' gCircle (1+t%) ' t%++ ' ENDWH ' ENDIF ' ' y = PEEKF (Y0 +i%*FSize) ' IF y <> 0 then ' y0% = INT(y) ' y = YY%+15 // at zero level ' t%=1 ' WHILE (t% <= y0%) ' gAT INT(x),INT(y) ' gCircle (1+t%) ' t%++ ' ENDWH ' END IF ' ELSE IF (ChkGrouped = 0) then ' IF PEEKF (Y0 +i%*FSize) = 1 then ' yy = 0 ' POKEF Graph +(INT(xx)*(nR%+1)+0)*FSize, PEEKF(Graph +(INT(xx)*(nR%+1)+0)*FSize)+1 ' IF PEEKF(Graph +(INT(xx)*(nR%+1)+0)*FSize) >= 5 ' POKEF Graph +(INT(xx)*(nR%+1)+0)*FSize, 5 ' ENDIF ' ELSE ' yy = 1 ' POKEF Graph +(INT(xx)*(nR%+1)+1)*FSize, PEEKF(Graph +(INT(xx)*(nR%+1)+1)*FSize)+1 ' IF PEEKF(Graph +(INT(xx)*(nR%+1)+1)*FSize) >= 5 then ' POKEF Graph +(INT(xx)*(nR%+1)+1)*FSize, 5 ' END IF ' END IF ' y = YY%+15-INT(YY%*yy) ' gAT INT(x),INT(y) ' IF yy = 0 : gCircle (PEEKF(Graph +(INT(xx)*(nR%+1)+0)*FSize)),1 : ENDIF ' IF yy = 1 : gCircle (PEEKF(Graph +(INT(xx)*(nR%+1)+1)*FSize)),1 : ENDIF ' END IF NEXT ' FREEALLOC Graph End Sub '****************************** ' ' ' ' Calculate sums that will be used to calculate the mean and standard deviation of each predictor. I do this because of the one "trick" that I use in this program to help ensure convergence of the iterative procedure. I recode all predictor variables by subtracting the mean and dividing by the standard deviation of that variable. So, I'm "centering" and "scaling" the variables, so that they will have a mean of zero and sd of one. This makes the simultaneous equations much better "conditioned", and greatly stabilizes the iterative process, especially if your original data had variables that were tightly clustered around some number that was far away from zero. For example, if one variable was year of observation, that had values ranging from 1997 to 2001, and another variable was pH, with values ranging from 7.2 to 7.5, a simple implementation might have a lot of trouble with the iteration procedure. But when I transform them, things get a lot more stable. Trust me on this one. ' ' for (j = 1; j<=nR; j++) { ' x = X[ix(i,j,nR+1)]; ' xM[j] = xM[j] + (Y0[i] + Y1[i])*x; ' xSD[j] = xSD[j] + (Y0[i] + Y1[i])*x*x; ' } ' } ' ' Now I'm done reading in all the data, so I print out some summaries. The variable "o" will contain the entire output, as one huge character string. I just keep concatenating more and more text onto it. Each NL character results in a new line being displayed when the o string is rendered on the screen: ' ' var o = "Descriptives..." + NL; ' ' o = o + ( NL + sY0 + " cases have Y=0; " + sY1 + " cases have Y=1." + NL ); ' ' o = o + ( NL + " Variable Avg SD " + NL ); ' for (j = 1; j<=nR; j++) { ' xM[j] = xM[j] / sC; ' xSD[j] = xSD[ j] / sC; ' xSD[j] = Sqrt( Abs( xSD[j] - xM[j] * xM[j] ) ) ' o = o + ( " " + Fmt3(j) + " " + Fmt(xM[j]) + Fmt(xSD[j])+ NL ); ' } ' xM[0] = 0; xSD[0] = 1; ' ' Here's where I actually get around to scaling those predictor variables: ' ' for (i = 0; ii,j,nR+1)] - xM[j] ) / xSD[j]; ' } ' } ' ' Now we're ready to actually do the iterations to get the regression coefficients: ' ' o = o + ( NL + "Iteration History..." ); ' form.output.value = o; ' ' Because of the variable scaling, the regression coefficients will probably wind up being within the range -5 to +5, so it's a pretty good guess to set them to zero. The intercept will probably be close to the log of the ratio of the number of 1's to 0's for outcomes. ' ' Par[0] = Ln( sY1 / sY0 ); ' for (j = 1; j<=nR; j++) { ' Par[j] = 0; ' } ' ' We're trying to minimize the log-likelihood, so let's start if off with a huge number. (LLp will be the LL from the "previous" iteration cycle. ' var LLp = 2e+10; ' var LL = 1e+10; ' ' Here's the main iteration loop. Iterate until the LL's converge to five decimals: ' ' while( Abs(LLp-LL)>0.00001 ) { ' LLp = LL; ' LL = 0; ' ' Zero out the simultaneous equations matrix: ' ' for (j = 0; j <=nR; j++) { ' for (k = j; k<=nR+1; k++) { ' Arr[ix(j,k,nR+2)] = 0; ' } ' } ' ' For each case: ' ' for (i = 0; i