!*********************** Contents **************************************** !* Sample driver program, VAX-11 Fortran; ********************************** !* HC: O(n^2) time, O(n^2) space hierarchical clustering, Fortran 77 ******* !* HCASS: determine cluster-memberships, Fortran 77. *********************** !* HCDEN: draw upper part of dendrogram, VAX-11 Fortran. ******************* !* Sample data set: last 36 lines. ***************************************** !*************************************************************************** ! REAL DATA(18,16),CRIT(18),MEMBR(18) ! REAL CRITVAL(9) ! INTEGER IA(18),IB(18) ! INTEGER ICLASS(18,9),HVALS(9) ! INTEGER IORDER(9),HEIGHT(9) ! DIMENSION NN(18),DISNN(18) ! REAL D(153) ! LOGICAL FLAG(18) ! IN ABOVE, 18=N, 16=M, 9=LEV, 153=N(N-1)/2. ! ! ! OPEN(UNIT=21,STATUS='OLD',FILE='SPECTR.DAT') ! ! ! N = 18 ! M = 16 ! DO I=1,N ! READ(21,100)(DATA(I,J),J=1,M) ! ENDDO ! 100 FORMAT(8F7.1) ! ! ! LEN = (N*(N-1))/2 ! IOPT=1 ! CALL HC(N,M,LEN,IOPT,DATA,IA,IB,CRIT,MEMBR,NN,DISNN,FLAG,D) ! ! ! LEV = 9 ! CALL HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,CRITVAL,HEIGHT) ! ! ! CALL HCDEN(LEV,IORDER,HEIGHT,CRITVAL) ! ! ! END !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C ! C ! HIERARCHICAL CLUSTERING using (user-specified) criterion. C ! C ! Parameters: C ! C !removed DATA(N,M) input data matrix, C ! DISS(LEN) dissimilarities in lower half diagonal C ! storage; LEN = N.N-1/2, C ! IOPT clustering criterion to be used, C ! IA, IB, CRIT history of agglomerations; dimensions C ! N, first N-1 locations only used, C ! MEMBR, NN, DISNN vectors of length N, used to store C ! cluster cardinalities, current nearest C ! neighbour, and the dissimilarity assoc. C ! with the latter. C ! FLAG boolean indicator of agglomerable obj./ C ! clusters. C ! C ! F. Murtagh, ESA/ESO/STECF, Garching, February 1986. C ! C !------------------------------------------------------------C module hc_ !----------------------------------------------------------------------------- use io_units use names use clust_data implicit none !----------------------------------------------------------------------------- ! ! !----------------------------------------------------------------------------- contains !----------------------------------------------------------------------------- ! hc.f !----------------------------------------------------------------------------- SUBROUTINE HC(N,M,LEN,IOPT,IA,IB,CRIT,MEMBR,NN,DISNN,& FLAG,DISS) integer :: N,M,LEN,IOPT REAL(kind=4) :: MEMBR(N) REAL(kind=4) :: DISS(LEN) INTEGER :: IA(N),IB(N) REAL(kind=4) :: CRIT(N) integer,DIMENSION(N) :: NN real(kind=4),dimension(N) ::DISNN LOGICAL :: FLAG(N) REAL(kind=4) INF DATA INF /1.E+20/ integer :: I,J,NCL,IND,IM,JM,I2,J2,K,IND1,IND2,IND3,JJ real(kind=8) :: DMIN,X,XX ! ! Initializations ! DO I=1,N MEMBR(I)=1. FLAG(I)=.TRUE. ENDDO NCL=N ! ! Construct dissimilarity matrix ! DO I=1,N-1 DO J=I+1,N IND=IOFFSET(N,I,J) !input DISS(IND)=0. !input DO K=1,M !input DISS(IND)=DISS(IND)+(DATA(I,K)-DATA(J,K))**2 !input ENDDO IF (IOPT.EQ.1) DISS(IND)=DISS(IND)/2. ! (Above is done for the case of the min. var. method ! where merging criteria are defined in terms of variances ! rather than distances.) ENDDO ENDDO ! ! Carry out an agglomeration - first create list of NNs ! DO I=1,N-1 DMIN=INF DO J=I+1,N IND=IOFFSET(N,I,J) IF (DISS(IND).GE.DMIN) GOTO 500 DMIN=DISS(IND) JM=J 500 CONTINUE ENDDO NN(I)=JM DISNN(I)=DMIN ENDDO ! 400 CONTINUE ! Next, determine least diss. using list of NNs DMIN=INF DO I=1,N-1 IF (.NOT.FLAG(I)) GOTO 600 IF (DISNN(I).GE.DMIN) GOTO 600 DMIN=DISNN(I) IM=I JM=NN(I) 600 CONTINUE ENDDO NCL=NCL-1 ! ! This allows an agglomeration to be carried out. ! I2=MIN0(IM,JM) J2=MAX0(IM,JM) IA(N-NCL)=I2 IB(N-NCL)=J2 CRIT(N-NCL)=DMIN ! ! Update dissimilarities from new cluster. ! FLAG(J2)=.FALSE. DMIN=INF DO K=1,N IF (.NOT.FLAG(K)) GOTO 800 IF (K.EQ.I2) GOTO 800 X=MEMBR(I2)+MEMBR(J2)+MEMBR(K) IF (I2.LT.K) THEN IND1=IOFFSET(N,I2,K) ELSE IND1=IOFFSET(N,K,I2) ENDIF IF (J2.LT.K) THEN IND2=IOFFSET(N,J2,K) ELSE IND2=IOFFSET(N,K,J2) ENDIF IND3=IOFFSET(N,I2,J2) XX=DISS(IND3) ! ! WARD'S MINIMUM VARIANCE METHOD - IOPT=1. ! IF (IOPT.EQ.1) THEN DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+ & (MEMBR(J2)+MEMBR(K))*DISS(IND2)- & MEMBR(K)*XX DISS(IND1)=DISS(IND1)/X ENDIF ! ! SINGLE LINK METHOD - IOPT=2. ! IF (IOPT.EQ.2) THEN DISS(IND1)=MIN(DISS(IND1),DISS(IND2)) ENDIF ! ! COMPLETE LINK METHOD - IOPT=3. ! IF (IOPT.EQ.3) THEN DISS(IND1)=MAX(DISS(IND1),DISS(IND2)) ENDIF ! ! AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4. ! IF (IOPT.EQ.4) THEN DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2))/ & (MEMBR(I2)+MEMBR(J2)) ENDIF ! ! MCQUITTY'S METHOD - IOPT=5. ! IF (IOPT.EQ.5) THEN DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2) ENDIF ! ! MEDIAN (GOWER'S) METHOD - IOPT=6. ! IF (IOPT.EQ.6) THEN DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)-0.25*XX ENDIF ! ! CENTROID METHOD - IOPT=7. ! IF (IOPT.EQ.7) THEN DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)- & MEMBR(I2)*MEMBR(J2)*XX/(MEMBR(I2)+MEMBR(J2)))/ & (MEMBR(I2)+MEMBR(J2)) ENDIF ! IF (I2.GT.K) GOTO 800 IF (DISS(IND1).GE.DMIN) GOTO 800 DMIN=DISS(IND1) JJ=K 800 CONTINUE ENDDO MEMBR(I2)=MEMBR(I2)+MEMBR(J2) DISNN(I2)=DMIN NN(I2)=JJ ! ! Update list of NNs insofar as this is required. ! DO I=1,N-1 IF (.NOT.FLAG(I)) GOTO 900 IF (NN(I).EQ.I2) GOTO 850 IF (NN(I).EQ.J2) GOTO 850 GOTO 900 850 CONTINUE ! (Redetermine NN of I:) DMIN=INF DO J=I+1,N IND=IOFFSET(N,I,J) IF (.NOT.FLAG(J)) GOTO 870 IF (I.EQ.J) GOTO 870 IF (DISS(IND).GE.DMIN) GOTO 870 DMIN=DISS(IND) JJ=J 870 CONTINUE ENDDO NN(I)=JJ DISNN(I)=DMIN 900 CONTINUE ENDDO ! ! Repeat previous steps until N-1 agglomerations carried out. ! IF (NCL.GT.1) GOTO 400 ! ! RETURN END SUBROUTINE HC !----------------------------------------------------------------------------- ! ! integer FUNCTION IOFFSET(N,I,J) ! Map row I and column J of upper half diagonal symmetric matrix ! onto vector. integer :: N,I,J IOFFSET=J+(I-1)*N-(I*(I+1))/2 RETURN END FUNCTION IOFFSET !----------------------------------------------------------------------------- !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C ! C ! Given a HIERARCHIC CLUSTERING, described as a sequence of C ! agglomerations, derive the assignments into clusters for the C ! top LEV-1 levels of the hierarchy. C ! Prepare also the required data for representing the C ! dendrogram of this top part of the hierarchy. C ! C ! Parameters: C ! C ! IA, IB, CRIT: vectors of dimension N defining the agglomer- C ! ations. C ! LEV: number of clusters in largest partition. C ! HVALS: vector of dim. LEV, used internally only. C ! ICLASS: array of cluster assignments; dim. N by LEV. C ! IORDER, CRITVAL, HEIGHT: vectors describing the dendrogram, C ! all of dim. LEV. C ! C ! F. Murtagh, ESA/ESO/STECF, Garching, February 1986. C ! C ! HISTORY C ! C ! Bounds bug fix, Oct. 1990, F. Murtagh. C ! Inserted line "IF (LOC.GT.LEV) GOTO 58" on line 48. This was C ! occassioned by incorrect termination of this loop when I C ! reached its (lower) extremity, i.e. N-LEV. Without the C ! /CHECK=(BOUNDS) option on VAX/VMS compilation, this inserted C ! statement was not necessary. C !---------------------------------------------------------------C SUBROUTINE HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,& CRITVAL,HEIGHT) ! include 'sizesclu.dat' ! include 'COMMON.IOUNITS' integer :: N,LEV !el integer :: ICLASS(maxconf,maxconf-1) integer :: ICLASS(N,N-1) INTEGER :: IA(N),IB(N),HVALS(LEV),IORDER(LEV),& HEIGHT(LEV) REAL(kind=4) :: CRIT(N),CRITVAL(LEV) integer :: I,J,LOC,LEVEL,ICL,ILEV,NCL,K ! ! Pick out the clusters which the N objects belong to, ! at levels N-2, N-3, ... N-LEV+1 of the hierarchy. ! The clusters are identified by the lowest seq. no. of ! their members. ! There are 2, 3, ... LEV clusters, respectively, for the ! above levels of the hierarchy. ! HVALS(1)=1 HVALS(2)=IB(N-1) LOC=3 DO 59 I=N-2,N-LEV,-1 DO 52 J=1,LOC-1 IF (IA(I).EQ.HVALS(J)) GOTO 54 52 CONTINUE HVALS(LOC)=IA(I) LOC=LOC+1 54 CONTINUE DO 56 J=1,LOC-1 IF (IB(I).EQ.HVALS(J)) GOTO 58 56 CONTINUE IF (LOC.GT.LEV) GOTO 58 HVALS(LOC)=IB(I) LOC=LOC+1 58 CONTINUE 59 CONTINUE ! DO 400 LEVEL=N-LEV,N-2 DO 200 I=1,N ICL=I DO 100 ILEV=1,LEVEL 100 IF (IB(ILEV).EQ.ICL) ICL=IA(ILEV) NCL=N-LEVEL ICLASS(I,NCL-1)=ICL 200 CONTINUE 400 CONTINUE ! DO 120 I=1,N DO 120 J=1,LEV-1 DO 110 K=2,LEV IF (ICLASS(I,J).NE.HVALS(K)) GOTO 110 ICLASS(I,J)=K GOTO 120 110 CONTINUE 120 CONTINUE ! WRITE (iout,450) (j,j=2,LEV) 450 FORMAT(4X,' SEQ NOS',8(i2,'CL'),10000(i3,'CL')) WRITE (iout,470) (' ---',j=2,LEV) 470 FORMAT(4X,' -------',10000a4) DO 500 I=1,N WRITE (iout,600) I,(ICLASS(I,J),J=1,LEV-1) 600 FORMAT(I11,8I4,10000i5) 500 CONTINUE ! ! Determine an ordering of the LEV clusters (at level LEV-1) ! for later representation of the dendrogram. ! These are stored in IORDER. ! Determine the associated ordering of the criterion values ! for the vertical lines in the dendrogram. ! The ordinal values of these criterion values may be used in ! preference, and these are stored in HEIGHT. ! Finally, note that the LEV clusters are renamed so that they ! have seq. nos. 1 to LEV. ! IORDER(1)=IA(N-1) IORDER(2)=IB(N-1) CRITVAL(1)=0.0 CRITVAL(2)=CRIT(N-1) HEIGHT(1)=LEV HEIGHT(2)=LEV-1 LOC=2 DO 700 I=N-2,N-LEV+1,-1 DO 650 J=1,LOC IF (IA(I).EQ.IORDER(J)) THEN ! Shift rightwards and insert IB(I) beside IORDER(J): DO 630 K=LOC+1,J+1,-1 IORDER(K)=IORDER(K-1) CRITVAL(K)=CRITVAL(K-1) HEIGHT(K)=HEIGHT(K-1) 630 CONTINUE IORDER(J+1)=IB(I) CRITVAL(J+1)=CRIT(I) HEIGHT(J+1)=I-(N-LEV) LOC=LOC+1 ENDIF 650 CONTINUE 700 CONTINUE DO 705 I=1,LEV DO 703 J=1,LEV IF (HVALS(I).EQ.IORDER(J)) THEN IORDER(J)=I GOTO 705 ENDIF 703 CONTINUE 705 CONTINUE ! RETURN END SUBROUTINE HCASS !----------------------------------------------------------------------------- !+++++++++++++++++++++++++++++++++++++++++++++++++C ! C ! Construct a DENDROGRAM of the top 8 levels of C ! a HIERARCHIC CLUSTERING. C ! C ! Parameters: C ! C ! IORDER, HEIGHT, CRITVAL: vectors of length LEV C ! defining the dendrogram. C ! These are: the ordering of objects C ! along the bottom of the dendrogram C ! (IORDER); the height of the vertical C ! above each object, in ordinal values C ! (HEIGHT); and in real values (CRITVAL).C ! C ! NOTE: these vectors MUST have been set up with C ! LEV = 9 in the prior call to routine C ! HCASS. ! C ! F. Murtagh, ESA/ESO/STECF, Garching, Feb. 1986.C ! C !-------------------------------------------------C SUBROUTINE HCDEN(LEV,IORDER,HEIGHT,CRITVAL) ! include 'COMMON.IOUNITS' integer :: LEV CHARACTER(len=80) :: LINE INTEGER :: IORDER(LEV),HEIGHT(LEV) REAL(kind=4) :: CRITVAL(LEV) ! INTEGER OUT(3*LEV,3*LEV) ! INTEGER UP,ACROSS,BLANK CHARACTER(len=1) :: OUT(3*LEV,3*LEV) CHARACTER(len=1) :: UP,ACROSS,BLANK DATA UP,ACROSS,BLANK /'|','-',' '/ integer :: I,I2,J,J2,K,I3,L,IC,IDUM ! ! DO I=1,3*LEV DO J=1,3*LEV OUT(I,J)=BLANK ENDDO ENDDO ! ! DO I=3,3*LEV,3 I2=I/3 ! J2=3*LEV+1-3*HEIGHT(I2) DO J=3*LEV,J2,-1 OUT(J,I)=UP ENDDO ! DO K=I,3,-1 I3=INT((K+2)/3) IF ( (3*LEV+1-HEIGHT(I3)*3).LT.J2) GOTO 100 OUT(J2,K)=ACROSS ENDDO 100 CONTINUE ! ENDDO ! ! IC=3 DO I=1,3*LEV IF (I.EQ.IC+1) THEN IDUM=IC/3 IDUM=LEV-IDUM DO L=1,LEV IF (HEIGHT(L).EQ.IDUM) GOTO 190 ENDDO 190 IDUM=L WRITE(iout,200) CRITVAL(IDUM),(OUT(I,J),J=1,3*LEV) IC=IC+3 ELSE LINE = ' ' WRITE(iout,210) (OUT(I,J),J=1,3*LEV) ENDIF 200 FORMAT(1H ,8X,F12.2,4X,27000A1) 210 FORMAT(1H ,24X,27000A1) ENDDO WRITE(iout,250) WRITE(iout,220)(IORDER(J),J=1,LEV) WRITE(iout,250) 220 FORMAT(1H ,24X,9000I3) WRITE(iout,230) LEV 230 FORMAT(1H ,13X,'CRITERION CLUSTERS 1 TO ',i3) WRITE(iout,240) LEV-1 240 FORMAT(1H ,13X,'VALUES. (TOP ',i3,' LEVELS OF HIERARCHY).') 250 FORMAT(/) ! ! RETURN END SUBROUTINE HCDEN !----------------------------------------------------------------------------- end module hc_ !----------------------------------------------------------------------------- !-----------------------------------------------------------------------------