1 !*********************** Contents ****************************************
2 !* Sample driver program, VAX-11 Fortran; **********************************
3 !* HC: O(n^2) time, O(n^2) space hierarchical clustering, Fortran 77 *******
4 !* HCASS: determine cluster-memberships, Fortran 77. ***********************
5 !* HCDEN: draw upper part of dendrogram, VAX-11 Fortran. *******************
6 !* Sample data set: last 36 lines. *****************************************
7 !***************************************************************************
8 ! REAL DATA(18,16),CRIT(18),MEMBR(18)
10 ! INTEGER IA(18),IB(18)
11 ! INTEGER ICLASS(18,9),HVALS(9)
12 ! INTEGER IORDER(9),HEIGHT(9)
13 ! DIMENSION NN(18),DISNN(18)
16 ! IN ABOVE, 18=N, 16=M, 9=LEV, 153=N(N-1)/2.
19 ! OPEN(UNIT=21,STATUS='OLD',FILE='SPECTR.DAT')
25 ! READ(21,100)(DATA(I,J),J=1,M)
32 ! CALL HC(N,M,LEN,IOPT,DATA,IA,IB,CRIT,MEMBR,NN,DISNN,FLAG,D)
36 ! CALL HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,CRITVAL,HEIGHT)
39 ! CALL HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
43 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
45 ! HIERARCHICAL CLUSTERING using (user-specified) criterion. C
49 !removed DATA(N,M) input data matrix, C
50 ! DISS(LEN) dissimilarities in lower half diagonal C
51 ! storage; LEN = N.N-1/2, C
52 ! IOPT clustering criterion to be used, C
53 ! IA, IB, CRIT history of agglomerations; dimensions C
54 ! N, first N-1 locations only used, C
55 ! MEMBR, NN, DISNN vectors of length N, used to store C
56 ! cluster cardinalities, current nearest C
57 ! neighbour, and the dissimilarity assoc. C
59 ! FLAG boolean indicator of agglomerable obj./ C
62 ! F. Murtagh, ESA/ESO/STECF, Garching, February 1986. C
64 !------------------------------------------------------------C
66 !-----------------------------------------------------------------------------
71 !-----------------------------------------------------------------------------
74 !-----------------------------------------------------------------------------
76 !-----------------------------------------------------------------------------
78 !-----------------------------------------------------------------------------
80 SUBROUTINE HC(N,M,LEN,IOPT,IA,IB,CRIT,MEMBR,NN,DISNN,&
82 integer :: N,M,LEN,IOPT
83 REAL(kind=4) :: MEMBR(N)
84 REAL(kind=4) :: DISS(LEN)
85 INTEGER :: IA(N),IB(N)
86 REAL(kind=4) :: CRIT(N)
87 integer,DIMENSION(N) :: NN
88 real(kind=4),dimension(N) ::DISNN
92 integer :: I,J,NCL,IND,IM,JM,I2,J2,K,IND1,IND2,IND3,JJ
93 real(kind=8) :: DMIN,X,XX
103 ! Construct dissimilarity matrix
110 !input DISS(IND)=DISS(IND)+(DATA(I,K)-DATA(J,K))**2
112 IF (IOPT.EQ.1) DISS(IND)=DISS(IND)/2.
113 ! (Above is done for the case of the min. var. method
114 ! where merging criteria are defined in terms of variances
115 ! rather than distances.)
119 ! Carry out an agglomeration - first create list of NNs
125 IF (DISS(IND).GE.DMIN) GOTO 500
135 ! Next, determine least diss. using list of NNs
138 IF (.NOT.FLAG(I)) GOTO 600
139 IF (DISNN(I).GE.DMIN) GOTO 600
147 ! This allows an agglomeration to be carried out.
155 ! Update dissimilarities from new cluster.
160 IF (.NOT.FLAG(K)) GOTO 800
161 IF (K.EQ.I2) GOTO 800
162 X=MEMBR(I2)+MEMBR(J2)+MEMBR(K)
173 IND3=IOFFSET(N,I2,J2)
176 ! WARD'S MINIMUM VARIANCE METHOD - IOPT=1.
179 DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+ &
180 (MEMBR(J2)+MEMBR(K))*DISS(IND2)- &
182 DISS(IND1)=DISS(IND1)/X
185 ! SINGLE LINK METHOD - IOPT=2.
188 DISS(IND1)=MIN(DISS(IND1),DISS(IND2))
191 ! COMPLETE LINK METHOD - IOPT=3.
194 DISS(IND1)=MAX(DISS(IND1),DISS(IND2))
197 ! AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4.
200 DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2))/ &
201 (MEMBR(I2)+MEMBR(J2))
204 ! MCQUITTY'S METHOD - IOPT=5.
207 DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)
210 ! MEDIAN (GOWER'S) METHOD - IOPT=6.
213 DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)-0.25*XX
216 ! CENTROID METHOD - IOPT=7.
219 DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)- &
220 MEMBR(I2)*MEMBR(J2)*XX/(MEMBR(I2)+MEMBR(J2)))/ &
221 (MEMBR(I2)+MEMBR(J2))
224 IF (I2.GT.K) GOTO 800
225 IF (DISS(IND1).GE.DMIN) GOTO 800
230 MEMBR(I2)=MEMBR(I2)+MEMBR(J2)
234 ! Update list of NNs insofar as this is required.
237 IF (.NOT.FLAG(I)) GOTO 900
238 IF (NN(I).EQ.I2) GOTO 850
239 IF (NN(I).EQ.J2) GOTO 850
242 ! (Redetermine NN of I:)
246 IF (.NOT.FLAG(J)) GOTO 870
248 IF (DISS(IND).GE.DMIN) GOTO 870
258 ! Repeat previous steps until N-1 agglomerations carried out.
260 IF (NCL.GT.1) GOTO 400
265 !-----------------------------------------------------------------------------
268 integer FUNCTION IOFFSET(N,I,J)
269 ! Map row I and column J of upper half diagonal symmetric matrix
272 IOFFSET=J+(I-1)*N-(I*(I+1))/2
275 !-----------------------------------------------------------------------------
276 !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
278 ! Given a HIERARCHIC CLUSTERING, described as a sequence of C
279 ! agglomerations, derive the assignments into clusters for the C
280 ! top LEV-1 levels of the hierarchy. C
281 ! Prepare also the required data for representing the C
282 ! dendrogram of this top part of the hierarchy. C
286 ! IA, IB, CRIT: vectors of dimension N defining the agglomer- C
288 ! LEV: number of clusters in largest partition. C
289 ! HVALS: vector of dim. LEV, used internally only. C
290 ! ICLASS: array of cluster assignments; dim. N by LEV. C
291 ! IORDER, CRITVAL, HEIGHT: vectors describing the dendrogram, C
294 ! F. Murtagh, ESA/ESO/STECF, Garching, February 1986. C
298 ! Bounds bug fix, Oct. 1990, F. Murtagh. C
299 ! Inserted line "IF (LOC.GT.LEV) GOTO 58" on line 48. This was C
300 ! occassioned by incorrect termination of this loop when I C
301 ! reached its (lower) extremity, i.e. N-LEV. Without the C
302 ! /CHECK=(BOUNDS) option on VAX/VMS compilation, this inserted C
303 ! statement was not necessary. C
304 !---------------------------------------------------------------C
305 SUBROUTINE HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,&
307 ! include 'sizesclu.dat'
308 ! include 'COMMON.IOUNITS'
310 !el integer :: ICLASS(maxconf,maxconf-1)
311 integer :: ICLASS(N,N-1)
312 INTEGER :: IA(N),IB(N),HVALS(LEV),IORDER(LEV),&
314 REAL(kind=4) :: CRIT(N),CRITVAL(LEV)
315 integer :: I,J,LOC,LEVEL,ICL,ILEV,NCL,K
317 ! Pick out the clusters which the N objects belong to,
318 ! at levels N-2, N-3, ... N-LEV+1 of the hierarchy.
319 ! The clusters are identified by the lowest seq. no. of
321 ! There are 2, 3, ... LEV clusters, respectively, for the
322 ! above levels of the hierarchy.
329 IF (IA(I).EQ.HVALS(J)) GOTO 54
335 IF (IB(I).EQ.HVALS(J)) GOTO 58
337 IF (LOC.GT.LEV) GOTO 58
343 DO 400 LEVEL=N-LEV,N-2
347 100 IF (IB(ILEV).EQ.ICL) ICL=IA(ILEV)
356 IF (ICLASS(I,J).NE.HVALS(K)) GOTO 110
362 WRITE (iout,450) (j,j=2,LEV)
363 450 FORMAT(4X,' SEQ NOS',8(i2,'CL'),10000(i3,'CL'))
364 WRITE (iout,470) (' ---',j=2,LEV)
365 470 FORMAT(4X,' -------',10000a4)
367 WRITE (iout,600) I,(ICLASS(I,J),J=1,LEV-1)
368 600 FORMAT(I11,8I4,10000i5)
371 ! Determine an ordering of the LEV clusters (at level LEV-1)
372 ! for later representation of the dendrogram.
373 ! These are stored in IORDER.
374 ! Determine the associated ordering of the criterion values
375 ! for the vertical lines in the dendrogram.
376 ! The ordinal values of these criterion values may be used in
377 ! preference, and these are stored in HEIGHT.
378 ! Finally, note that the LEV clusters are renamed so that they
379 ! have seq. nos. 1 to LEV.
388 DO 700 I=N-2,N-LEV+1,-1
390 IF (IA(I).EQ.IORDER(J)) THEN
391 ! Shift rightwards and insert IB(I) beside IORDER(J):
392 DO 630 K=LOC+1,J+1,-1
393 IORDER(K)=IORDER(K-1)
394 CRITVAL(K)=CRITVAL(K-1)
395 HEIGHT(K)=HEIGHT(K-1)
399 HEIGHT(J+1)=I-(N-LEV)
406 IF (HVALS(I).EQ.IORDER(J)) THEN
415 !-----------------------------------------------------------------------------
416 !+++++++++++++++++++++++++++++++++++++++++++++++++C
418 ! Construct a DENDROGRAM of the top 8 levels of C
419 ! a HIERARCHIC CLUSTERING. C
423 ! IORDER, HEIGHT, CRITVAL: vectors of length LEV C
424 ! defining the dendrogram. C
425 ! These are: the ordering of objects C
426 ! along the bottom of the dendrogram C
427 ! (IORDER); the height of the vertical C
428 ! above each object, in ordinal values C
429 ! (HEIGHT); and in real values (CRITVAL).C
431 ! NOTE: these vectors MUST have been set up with C
432 ! LEV = 9 in the prior call to routine C
435 ! F. Murtagh, ESA/ESO/STECF, Garching, Feb. 1986.C
437 !-------------------------------------------------C
438 SUBROUTINE HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
439 ! include 'COMMON.IOUNITS'
441 CHARACTER(len=80) :: LINE
442 INTEGER :: IORDER(LEV),HEIGHT(LEV)
443 REAL(kind=4) :: CRITVAL(LEV)
444 ! INTEGER OUT(3*LEV,3*LEV)
445 ! INTEGER UP,ACROSS,BLANK
446 CHARACTER(len=1) :: OUT(3*LEV,3*LEV)
447 CHARACTER(len=1) :: UP,ACROSS,BLANK
448 DATA UP,ACROSS,BLANK /'|','-',' '/
449 integer :: I,I2,J,J2,K,I3,L,IC,IDUM
462 J2=3*LEV+1-3*HEIGHT(I2)
469 IF ( (3*LEV+1-HEIGHT(I3)*3).LT.J2) GOTO 100
483 IF (HEIGHT(L).EQ.IDUM) GOTO 190
486 WRITE(iout,200) CRITVAL(IDUM),(OUT(I,J),J=1,3*LEV)
490 WRITE(iout,210) (OUT(I,J),J=1,3*LEV)
492 200 FORMAT(1H ,8X,F12.2,4X,27000A1)
493 210 FORMAT(1H ,24X,27000A1)
496 WRITE(iout,220)(IORDER(J),J=1,LEV)
498 220 FORMAT(1H ,24X,9000I3)
500 230 FORMAT(1H ,13X,'CRITERION CLUSTERS 1 TO ',i3)
501 WRITE(iout,240) LEV-1
502 240 FORMAT(1H ,13X,'VALUES. (TOP ',i3,' LEVELS OF HIERARCHY).')
508 !-----------------------------------------------------------------------------
510 !-----------------------------------------------------------------------------
511 !-----------------------------------------------------------------------------