1 C*********************** Contents ****************************************
2 C* Sample driver program, VAX-11 Fortran; **********************************
3 C* HC: O(n^2) time, O(n^2) space hierarchical clustering, Fortran 77 *******
4 C* HCASS: determine cluster-memberships, Fortran 77. ***********************
5 C* HCDEN: draw upper part of dendrogram, VAX-11 Fortran. *******************
6 C* Sample data set: last 36 lines. *****************************************
7 C***************************************************************************
8 C REAL DATA(18,16),CRIT(18),MEMBR(18)
10 C INTEGER IA(18),IB(18)
11 C INTEGER ICLASS(18,9),HVALS(9)
12 C INTEGER IORDER(9),HEIGHT(9)
13 C DIMENSION NN(18),DISNN(18)
16 C IN ABOVE, 18=N, 16=M, 9=LEV, 153=N(N-1)/2.
19 C OPEN(UNIT=21,STATUS='OLD',FILE='SPECTR.DAT')
25 C READ(21,100)(DATA(I,J),J=1,M)
32 C CALL HC(N,M,LEN,IOPT,DATA,IA,IB,CRIT,MEMBR,NN,DISNN,FLAG,D)
36 C CALL HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,CRITVAL,HEIGHT)
39 C CALL HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
43 C++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
45 C HIERARCHICAL CLUSTERING using (user-specified) criterion. C
49 Cremoved DATA(N,M) input data matrix, C
50 C DISS(LEN) dissimilarities in lower half diagonal C
51 C storage; LEN = N.N-1/2, C
52 C IOPT clustering criterion to be used, C
53 C IA, IB, CRIT history of agglomerations; dimensions C
54 C N, first N-1 locations only used, C
55 C MEMBR, NN, DISNN vectors of length N, used to store C
56 C cluster cardinalities, current nearest C
57 C neighbour, and the dissimilarity assoc. C
59 C FLAG boolean indicator of agglomerable obj./ C
62 C F. Murtagh, ESA/ESO/STECF, Garching, February 1986. C
64 C------------------------------------------------------------C
65 SUBROUTINE HC(N,M,LEN,IOPT,IA,IB,CRIT,MEMBR,NN,DISNN,
71 DIMENSION NN(N),DISNN(N)
84 C Construct dissimilarity matrix
91 cinput DISS(IND)=DISS(IND)+(DATA(I,K)-DATA(J,K))**2
93 IF (IOPT.EQ.1) DISS(IND)=DISS(IND)/2.
94 C (Above is done for the case of the min. var. method
95 C where merging criteria are defined in terms of variances
96 C rather than distances.)
100 C Carry out an agglomeration - first create list of NNs
106 IF (DISS(IND).GE.DMIN) GOTO 500
116 C Next, determine least diss. using list of NNs
119 IF (.NOT.FLAG(I)) GOTO 600
120 IF (DISNN(I).GE.DMIN) GOTO 600
128 C This allows an agglomeration to be carried out.
136 C Update dissimilarities from new cluster.
141 IF (.NOT.FLAG(K)) GOTO 800
142 IF (K.EQ.I2) GOTO 800
143 X=MEMBR(I2)+MEMBR(J2)+MEMBR(K)
154 IND3=IOFFSET(N,I2,J2)
157 C WARD'S MINIMUM VARIANCE METHOD - IOPT=1.
160 DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+
161 X (MEMBR(J2)+MEMBR(K))*DISS(IND2)-
163 DISS(IND1)=DISS(IND1)/X
166 C SINGLE LINK METHOD - IOPT=2.
169 DISS(IND1)=MIN(DISS(IND1),DISS(IND2))
172 C COMPLETE LINK METHOD - IOPT=3.
175 DISS(IND1)=MAX(DISS(IND1),DISS(IND2))
178 C AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4.
181 DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2))/
182 X (MEMBR(I2)+MEMBR(J2))
185 C MCQUITTY'S METHOD - IOPT=5.
188 DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)
191 C MEDIAN (GOWER'S) METHOD - IOPT=6.
194 DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)-0.25*XX
197 C CENTROID METHOD - IOPT=7.
200 DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)-
201 X MEMBR(I2)*MEMBR(J2)*XX/(MEMBR(I2)+MEMBR(J2)))/
202 X (MEMBR(I2)+MEMBR(J2))
205 IF (I2.GT.K) GOTO 800
206 IF (DISS(IND1).GE.DMIN) GOTO 800
211 MEMBR(I2)=MEMBR(I2)+MEMBR(J2)
215 C Update list of NNs insofar as this is required.
218 IF (.NOT.FLAG(I)) GOTO 900
219 IF (NN(I).EQ.I2) GOTO 850
220 IF (NN(I).EQ.J2) GOTO 850
223 C (Redetermine NN of I:)
227 IF (.NOT.FLAG(J)) GOTO 870
229 IF (DISS(IND).GE.DMIN) GOTO 870
239 C Repeat previous steps until N-1 agglomerations carried out.
241 IF (NCL.GT.1) GOTO 400
248 FUNCTION IOFFSET(N,I,J)
249 C Map row I and column J of upper half diagonal symmetric matrix
251 IOFFSET=J+(I-1)*N-(I*(I+1))/2
254 C+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
256 C Given a HIERARCHIC CLUSTERING, described as a sequence of C
257 C agglomerations, derive the assignments into clusters for the C
258 C top LEV-1 levels of the hierarchy. C
259 C Prepare also the required data for representing the C
260 C dendrogram of this top part of the hierarchy. C
264 C IA, IB, CRIT: vectors of dimension N defining the agglomer- C
266 C LEV: number of clusters in largest partition. C
267 C HVALS: vector of dim. LEV, used internally only. C
268 C ICLASS: array of cluster assignments; dim. N by LEV. C
269 C IORDER, CRITVAL, HEIGHT: vectors describing the dendrogram, C
272 C F. Murtagh, ESA/ESO/STECF, Garching, February 1986. C
276 C Bounds bug fix, Oct. 1990, F. Murtagh. C
277 C Inserted line "IF (LOC.GT.LEV) GOTO 58" on line 48. This was C
278 C occassioned by incorrect termination of this loop when I C
279 C reached its (lower) extremity, i.e. N-LEV. Without the C
280 C /CHECK=(BOUNDS) option on VAX/VMS compilation, this inserted C
281 C statement was not necessary. C
282 C---------------------------------------------------------------C
283 SUBROUTINE HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,
286 include 'sizesclu.dat'
287 include 'COMMON.IOUNITS'
288 integer ICLASS(maxconf,maxconf-1)
289 INTEGER IA(N),IB(N),HVALS(LEV),IORDER(LEV),
291 REAL CRIT(N),CRITVAL(LEV)
293 C Pick out the clusters which the N objects belong to,
294 C at levels N-2, N-3, ... N-LEV+1 of the hierarchy.
295 C The clusters are identified by the lowest seq. no. of
297 C There are 2, 3, ... LEV clusters, respectively, for the
298 C above levels of the hierarchy.
305 IF (IA(I).EQ.HVALS(J)) GOTO 54
311 IF (IB(I).EQ.HVALS(J)) GOTO 58
313 IF (LOC.GT.LEV) GOTO 58
319 DO 400 LEVEL=N-LEV,N-2
323 100 IF (IB(ILEV).EQ.ICL) ICL=IA(ILEV)
332 IF (ICLASS(I,J).NE.HVALS(K)) GOTO 110
338 WRITE (iout,450) (j,j=2,LEV)
339 450 FORMAT(4X,' SEQ NOS',8(i2,'CL'),10000(i3,'CL'))
340 WRITE (iout,470) (' ---',j=2,LEV)
341 470 FORMAT(4X,' -------',10000a4)
343 WRITE (iout,600) I,(ICLASS(I,J),J=1,LEV-1)
344 600 FORMAT(I11,8I4,10000i5)
347 C Determine an ordering of the LEV clusters (at level LEV-1)
348 C for later representation of the dendrogram.
349 C These are stored in IORDER.
350 C Determine the associated ordering of the criterion values
351 C for the vertical lines in the dendrogram.
352 C The ordinal values of these criterion values may be used in
353 C preference, and these are stored in HEIGHT.
354 C Finally, note that the LEV clusters are renamed so that they
355 C have seq. nos. 1 to LEV.
364 DO 700 I=N-2,N-LEV+1,-1
366 IF (IA(I).EQ.IORDER(J)) THEN
367 C Shift rightwards and insert IB(I) beside IORDER(J):
368 DO 630 K=LOC+1,J+1,-1
369 IORDER(K)=IORDER(K-1)
370 CRITVAL(K)=CRITVAL(K-1)
371 HEIGHT(K)=HEIGHT(K-1)
375 HEIGHT(J+1)=I-(N-LEV)
382 IF (HVALS(I).EQ.IORDER(J)) THEN
391 C+++++++++++++++++++++++++++++++++++++++++++++++++C
393 C Construct a DENDROGRAM of the top 8 levels of C
394 C a HIERARCHIC CLUSTERING. C
398 C IORDER, HEIGHT, CRITVAL: vectors of length LEV C
399 C defining the dendrogram. C
400 C These are: the ordering of objects C
401 C along the bottom of the dendrogram C
402 C (IORDER); the height of the vertical C
403 C above each object, in ordinal values C
404 C (HEIGHT); and in real values (CRITVAL).C
406 C NOTE: these vectors MUST have been set up with C
407 C LEV = 9 in the prior call to routine C
410 C F. Murtagh, ESA/ESO/STECF, Garching, Feb. 1986.C
412 C-------------------------------------------------C
413 SUBROUTINE HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
415 include 'COMMON.IOUNITS'
417 INTEGER IORDER(LEV),HEIGHT(LEV)
419 C INTEGER OUT(3*LEV,3*LEV)
420 CHARACTER*1 OUT(3*LEV,3*LEV)
421 c INTEGER UP,ACROSS,BLANK
422 CHARACTER*1 UP,ACROSS,BLANK
423 DATA UP,ACROSS,BLANK/'|','-',' '/
436 J2=3*LEV+1-3*HEIGHT(I2)
443 IF ( (3*LEV+1-HEIGHT(I3)*3).LT.J2) GOTO 100
457 IF (HEIGHT(L).EQ.IDUM) GOTO 190
460 WRITE(iout,200) CRITVAL(IDUM),(OUT(I,J),J=1,3*LEV)
464 WRITE(iout,210) (OUT(I,J),J=1,3*LEV)
466 200 FORMAT(1H ,8X,F12.2,4X,27000A1)
467 210 FORMAT(1H ,24X,27000A1)
470 WRITE(iout,220)(IORDER(J),J=1,LEV)
472 220 FORMAT(1H ,24X,9000I3)
474 230 FORMAT(1H ,13X,'CRITERION CLUSTERS 1 TO ',i3)
475 WRITE(iout,240) LEV-1
476 240 FORMAT(1H ,13X,'VALUES. (TOP ',i3,' LEVELS OF HIERARCHY).')