unres_package_Oct_2016 from emilial
[unres4.git] / source / cluster / hc.f90
diff --git a/source/cluster/hc.f90 b/source/cluster/hc.f90
new file mode 100644 (file)
index 0000000..113e71c
--- /dev/null
@@ -0,0 +1,511 @@
+!***********************  Contents  ****************************************
+!* Sample driver program, VAX-11 Fortran; **********************************
+!* HC: O(n^2) time, O(n^2) space hierarchical clustering, Fortran 77 *******
+!* HCASS: determine cluster-memberships, Fortran 77. *********************** 
+!* HCDEN: draw upper part of dendrogram, VAX-11 Fortran. *******************
+!* Sample data set: last 36 lines. *****************************************
+!***************************************************************************
+!      REAL DATA(18,16),CRIT(18),MEMBR(18)
+!      REAL CRITVAL(9)
+!      INTEGER IA(18),IB(18)
+!      INTEGER ICLASS(18,9),HVALS(9)
+!      INTEGER IORDER(9),HEIGHT(9)
+!      DIMENSION NN(18),DISNN(18)
+!      REAL D(153)
+!      LOGICAL FLAG(18)
+! IN ABOVE, 18=N, 16=M, 9=LEV, 153=N(N-1)/2.
+!
+!
+!      OPEN(UNIT=21,STATUS='OLD',FILE='SPECTR.DAT')
+!
+!
+!      N = 18
+!      M = 16
+!      DO I=1,N
+!        READ(21,100)(DATA(I,J),J=1,M)        
+!      ENDDO
+! 100  FORMAT(8F7.1)
+!
+!
+!      LEN = (N*(N-1))/2
+!      IOPT=1
+!      CALL HC(N,M,LEN,IOPT,DATA,IA,IB,CRIT,MEMBR,NN,DISNN,FLAG,D)
+!
+!
+!      LEV = 9
+!      CALL HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,CRITVAL,HEIGHT)
+!
+!
+!      CALL HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
+!
+!
+!      END
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
+!                                                            C
+!  HIERARCHICAL CLUSTERING using (user-specified) criterion. C
+!                                                            C
+!  Parameters:                                               C
+!                                                            C
+!removed  DATA(N,M)         input data matrix,               C
+!  DISS(LEN)         dissimilarities in lower half diagonal  C
+!                    storage; LEN = N.N-1/2,                 C
+!  IOPT              clustering criterion to be used,        C
+!  IA, IB, CRIT      history of agglomerations; dimensions   C
+!                    N, first N-1 locations only used,       C
+!  MEMBR, NN, DISNN  vectors of length N, used to store      C 
+!                    cluster cardinalities, current nearest  C
+!                    neighbour, and the dissimilarity assoc. C
+!                    with the latter.                        C
+!  FLAG              boolean indicator of agglomerable obj./ C
+!                    clusters.                               C
+!                                                            C
+!  F. Murtagh, ESA/ESO/STECF, Garching, February 1986.       C
+!                                                            C
+!------------------------------------------------------------C
+      module hc_
+!-----------------------------------------------------------------------------
+      use io_units
+      use names
+      use clust_data
+      implicit none
+!-----------------------------------------------------------------------------
+!
+!
+!-----------------------------------------------------------------------------
+      contains
+!-----------------------------------------------------------------------------
+! hc.f
+!-----------------------------------------------------------------------------
+
+      SUBROUTINE HC(N,M,LEN,IOPT,IA,IB,CRIT,MEMBR,NN,DISNN,&
+                     FLAG,DISS)
+      integer :: N,M,LEN,IOPT
+      REAL(kind=4) :: MEMBR(N)
+      REAL(kind=4) :: DISS(LEN)
+      INTEGER :: IA(N),IB(N)
+      REAL(kind=4) :: CRIT(N)
+      integer,DIMENSION(N) :: NN
+      real(kind=4),dimension(N) ::DISNN 
+      LOGICAL :: FLAG(N)
+      REAL(kind=4) INF
+      DATA INF /1.E+20/
+      integer :: I,J,NCL,IND,IM,JM,I2,J2,K,IND1,IND2,IND3,JJ
+      real(kind=8) :: DMIN,X,XX
+!
+!  Initializations
+!
+      DO I=1,N
+         MEMBR(I)=1.
+         FLAG(I)=.TRUE.
+      ENDDO
+      NCL=N
+!
+!  Construct dissimilarity matrix
+!
+      DO I=1,N-1
+         DO J=I+1,N
+            IND=IOFFSET(N,I,J)
+!input            DISS(IND)=0.
+!input            DO K=1,M
+!input               DISS(IND)=DISS(IND)+(DATA(I,K)-DATA(J,K))**2
+!input            ENDDO
+            IF (IOPT.EQ.1) DISS(IND)=DISS(IND)/2.
+!           (Above is done for the case of the min. var. method
+!            where merging criteria are defined in terms of variances
+!            rather than distances.)
+          ENDDO
+       ENDDO
+!
+!  Carry out an agglomeration - first create list of NNs
+!
+      DO I=1,N-1
+         DMIN=INF
+         DO J=I+1,N
+            IND=IOFFSET(N,I,J)
+            IF (DISS(IND).GE.DMIN) GOTO 500
+               DMIN=DISS(IND)
+               JM=J
+  500    CONTINUE
+         ENDDO
+         NN(I)=JM
+         DISNN(I)=DMIN
+      ENDDO
+!
+  400 CONTINUE
+!     Next, determine least diss. using list of NNs
+      DMIN=INF
+      DO I=1,N-1
+         IF (.NOT.FLAG(I)) GOTO 600
+         IF (DISNN(I).GE.DMIN) GOTO 600
+            DMIN=DISNN(I)
+            IM=I
+            JM=NN(I)
+  600    CONTINUE
+      ENDDO
+      NCL=NCL-1
+!
+!  This allows an agglomeration to be carried out.
+!
+      I2=MIN0(IM,JM)
+      J2=MAX0(IM,JM)
+      IA(N-NCL)=I2
+      IB(N-NCL)=J2
+      CRIT(N-NCL)=DMIN
+!
+!  Update dissimilarities from new cluster.
+!
+      FLAG(J2)=.FALSE.
+      DMIN=INF
+      DO K=1,N
+         IF (.NOT.FLAG(K)) GOTO 800
+         IF (K.EQ.I2) GOTO 800
+         X=MEMBR(I2)+MEMBR(J2)+MEMBR(K)
+         IF (I2.LT.K) THEN
+                           IND1=IOFFSET(N,I2,K)
+                      ELSE
+                           IND1=IOFFSET(N,K,I2)
+         ENDIF
+         IF (J2.LT.K) THEN
+                           IND2=IOFFSET(N,J2,K)
+                      ELSE
+                           IND2=IOFFSET(N,K,J2)
+         ENDIF
+         IND3=IOFFSET(N,I2,J2)
+         XX=DISS(IND3)
+!
+!  WARD'S MINIMUM VARIANCE METHOD - IOPT=1.
+!
+         IF (IOPT.EQ.1) THEN
+            DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+ &
+                       (MEMBR(J2)+MEMBR(K))*DISS(IND2)- &
+                       MEMBR(K)*XX
+            DISS(IND1)=DISS(IND1)/X
+         ENDIF
+!
+!  SINGLE LINK METHOD - IOPT=2.
+!
+         IF (IOPT.EQ.2) THEN
+            DISS(IND1)=MIN(DISS(IND1),DISS(IND2))
+         ENDIF
+!
+!  COMPLETE LINK METHOD - IOPT=3.
+!
+         IF (IOPT.EQ.3) THEN
+            DISS(IND1)=MAX(DISS(IND1),DISS(IND2))
+         ENDIF
+!
+!  AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4.
+!
+         IF (IOPT.EQ.4) THEN
+            DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2))/ &
+                       (MEMBR(I2)+MEMBR(J2))
+         ENDIF
+!
+!  MCQUITTY'S METHOD - IOPT=5.
+!
+         IF (IOPT.EQ.5) THEN
+            DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)
+         ENDIF
+!
+!  MEDIAN (GOWER'S) METHOD - IOPT=6.
+!
+         IF (IOPT.EQ.6) THEN
+            DISS(IND1)=0.5*DISS(IND1)+0.5*DISS(IND2)-0.25*XX
+         ENDIF
+!
+!  CENTROID METHOD - IOPT=7.
+!
+         IF (IOPT.EQ.7) THEN
+            DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)- &
+                MEMBR(I2)*MEMBR(J2)*XX/(MEMBR(I2)+MEMBR(J2)))/ &
+                (MEMBR(I2)+MEMBR(J2))
+            ENDIF
+!
+         IF (I2.GT.K) GOTO 800
+         IF (DISS(IND1).GE.DMIN) GOTO 800
+            DMIN=DISS(IND1)
+            JJ=K
+  800    CONTINUE
+      ENDDO
+      MEMBR(I2)=MEMBR(I2)+MEMBR(J2)
+      DISNN(I2)=DMIN
+      NN(I2)=JJ
+!
+!  Update list of NNs insofar as this is required.
+!
+      DO I=1,N-1
+         IF (.NOT.FLAG(I)) GOTO 900
+         IF (NN(I).EQ.I2) GOTO 850
+         IF (NN(I).EQ.J2) GOTO 850
+         GOTO 900
+  850    CONTINUE
+!        (Redetermine NN of I:)
+         DMIN=INF
+         DO J=I+1,N
+            IND=IOFFSET(N,I,J)
+            IF (.NOT.FLAG(J)) GOTO 870
+            IF (I.EQ.J) GOTO 870
+            IF (DISS(IND).GE.DMIN) GOTO 870
+               DMIN=DISS(IND)
+               JJ=J
+  870       CONTINUE
+         ENDDO
+         NN(I)=JJ
+         DISNN(I)=DMIN
+  900    CONTINUE
+      ENDDO
+!
+!  Repeat previous steps until N-1 agglomerations carried out.
+!
+      IF (NCL.GT.1) GOTO 400
+!
+!
+      RETURN
+      END SUBROUTINE HC
+!-----------------------------------------------------------------------------
+!
+!
+      integer FUNCTION IOFFSET(N,I,J)
+!  Map row I and column J of upper half diagonal symmetric matrix 
+!  onto vector.
+      integer :: N,I,J
+      IOFFSET=J+(I-1)*N-(I*(I+1))/2
+      RETURN
+      END FUNCTION IOFFSET
+!-----------------------------------------------------------------------------
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
+!                                                               C
+!  Given a HIERARCHIC CLUSTERING, described as a sequence of    C
+!  agglomerations, derive the assignments into clusters for the C
+!  top LEV-1 levels of the hierarchy.                           C
+!  Prepare also the required data for representing the          C
+!  dendrogram of this top part of the hierarchy.                C
+!                                                               C
+!  Parameters:                                                  C
+!                                                               C
+!  IA, IB, CRIT: vectors of dimension N defining the agglomer-  C
+!                 ations.                                       C
+!  LEV:          number of clusters in largest partition.       C
+!  HVALS:        vector of dim. LEV, used internally only.      C
+!  ICLASS:       array of cluster assignments; dim. N by LEV.   C
+!  IORDER, CRITVAL, HEIGHT: vectors describing the dendrogram,  C
+!                all of dim. LEV.                               C
+!                                                               C
+!  F. Murtagh, ESA/ESO/STECF, Garching, February 1986.          C
+!                                                               C
+! HISTORY                                                       C
+!                                                               C
+! Bounds bug fix, Oct. 1990, F. Murtagh.                        C
+! Inserted line "IF (LOC.GT.LEV) GOTO 58" on line 48.  This was C
+! occassioned by incorrect termination of this loop when I      C
+! reached its (lower) extremity, i.e. N-LEV.  Without the       C
+! /CHECK=(BOUNDS) option on VAX/VMS compilation, this inserted  C
+! statement was not necessary.                                  C
+!---------------------------------------------------------------C
+      SUBROUTINE HCASS(N,IA,IB,CRIT,LEV,ICLASS,HVALS,IORDER,&
+              CRITVAL,HEIGHT)
+!      include 'sizesclu.dat'
+!      include 'COMMON.IOUNITS'
+      integer :: N,LEV
+!el      integer :: ICLASS(maxconf,maxconf-1)
+      integer :: ICLASS(N,N-1)
+      INTEGER :: IA(N),IB(N),HVALS(LEV),IORDER(LEV),&
+              HEIGHT(LEV)
+      REAL(kind=4) :: CRIT(N),CRITVAL(LEV)
+      integer :: I,J,LOC,LEVEL,ICL,ILEV,NCL,K
+!
+!  Pick out the clusters which the N objects belong to,
+!  at levels N-2, N-3, ... N-LEV+1 of the hierarchy.
+!  The clusters are identified by the lowest seq. no. of
+!  their members.
+!  There are 2, 3, ... LEV clusters, respectively, for the
+!  above levels of the hierarchy.
+!
+      HVALS(1)=1
+      HVALS(2)=IB(N-1)
+      LOC=3
+      DO 59 I=N-2,N-LEV,-1
+         DO 52 J=1,LOC-1
+            IF (IA(I).EQ.HVALS(J)) GOTO 54
+  52     CONTINUE
+         HVALS(LOC)=IA(I)
+         LOC=LOC+1
+  54     CONTINUE
+         DO 56 J=1,LOC-1
+            IF (IB(I).EQ.HVALS(J)) GOTO 58
+  56     CONTINUE
+         IF (LOC.GT.LEV) GOTO 58
+         HVALS(LOC)=IB(I)
+         LOC=LOC+1
+  58     CONTINUE
+  59  CONTINUE
+!
+      DO 400 LEVEL=N-LEV,N-2
+         DO 200 I=1,N
+            ICL=I
+            DO 100 ILEV=1,LEVEL
+  100       IF (IB(ILEV).EQ.ICL) ICL=IA(ILEV)
+            NCL=N-LEVEL
+            ICLASS(I,NCL-1)=ICL
+  200    CONTINUE
+  400  CONTINUE
+!
+      DO 120 I=1,N
+      DO 120 J=1,LEV-1
+      DO 110 K=2,LEV
+      IF (ICLASS(I,J).NE.HVALS(K)) GOTO 110
+         ICLASS(I,J)=K
+         GOTO 120
+  110 CONTINUE
+  120 CONTINUE
+!
+      WRITE (iout,450) (j,j=2,LEV)
+  450 FORMAT(4X,' SEQ NOS',8(i2,'CL'),10000(i3,'CL'))
+      WRITE (iout,470) (' ---',j=2,LEV)
+  470 FORMAT(4X,' -------',10000a4)
+      DO 500 I=1,N
+      WRITE (iout,600) I,(ICLASS(I,J),J=1,LEV-1) 
+  600 FORMAT(I11,8I4,10000i5)                    
+  500 CONTINUE
+!
+!  Determine an ordering of the LEV clusters (at level LEV-1)
+!  for later representation of the dendrogram.
+!  These are stored in IORDER.
+!  Determine the associated ordering of the criterion values
+!  for the vertical lines in the dendrogram.
+!  The ordinal values of these criterion values may be used in
+!  preference, and these are stored in HEIGHT.
+!  Finally, note that the LEV clusters are renamed so that they
+!  have seq. nos. 1 to LEV.
+!
+      IORDER(1)=IA(N-1)
+      IORDER(2)=IB(N-1)
+      CRITVAL(1)=0.0
+      CRITVAL(2)=CRIT(N-1)
+      HEIGHT(1)=LEV
+      HEIGHT(2)=LEV-1
+      LOC=2
+      DO 700 I=N-2,N-LEV+1,-1
+         DO 650 J=1,LOC
+            IF (IA(I).EQ.IORDER(J)) THEN
+!              Shift rightwards and insert IB(I) beside IORDER(J):
+               DO 630 K=LOC+1,J+1,-1
+                  IORDER(K)=IORDER(K-1)
+                  CRITVAL(K)=CRITVAL(K-1)
+                  HEIGHT(K)=HEIGHT(K-1)
+  630          CONTINUE
+               IORDER(J+1)=IB(I)
+                CRITVAL(J+1)=CRIT(I)
+                HEIGHT(J+1)=I-(N-LEV)
+               LOC=LOC+1
+            ENDIF
+  650   CONTINUE
+  700 CONTINUE
+      DO 705 I=1,LEV
+         DO 703 J=1,LEV
+            IF (HVALS(I).EQ.IORDER(J)) THEN
+               IORDER(J)=I
+               GOTO 705
+            ENDIF
+  703    CONTINUE
+  705 CONTINUE
+!
+      RETURN
+      END SUBROUTINE HCASS
+!-----------------------------------------------------------------------------
+!+++++++++++++++++++++++++++++++++++++++++++++++++C
+!                                                 C
+!  Construct a DENDROGRAM of the top 8 levels of  C
+!  a HIERARCHIC CLUSTERING.                       C
+!                                                 C
+!  Parameters:                                    C
+!                                                 C
+!  IORDER, HEIGHT, CRITVAL: vectors of length LEV C
+!          defining the dendrogram.               C
+!          These are: the ordering of objects     C
+!          along the bottom of the dendrogram     C
+!          (IORDER); the height of the vertical   C
+!          above each object, in ordinal values   C
+!          (HEIGHT); and in real values (CRITVAL).C
+!                                                 C
+!  NOTE: these vectors MUST have been set up with C
+!        LEV = 9 in the prior call to routine     C
+!        HCASS.
+!                                                 C
+!  F. Murtagh, ESA/ESO/STECF, Garching, Feb. 1986.C
+!                                                 C 
+!-------------------------------------------------C
+      SUBROUTINE HCDEN(LEV,IORDER,HEIGHT,CRITVAL)
+!      include 'COMMON.IOUNITS'
+      integer :: LEV
+      CHARACTER(len=80) :: LINE
+      INTEGER :: IORDER(LEV),HEIGHT(LEV)
+      REAL(kind=4) :: CRITVAL(LEV)
+!      INTEGER OUT(3*LEV,3*LEV)
+!      INTEGER UP,ACROSS,BLANK
+      CHARACTER(len=1) :: OUT(3*LEV,3*LEV)
+      CHARACTER(len=1) :: UP,ACROSS,BLANK
+      DATA UP,ACROSS,BLANK /'|','-',' '/
+      integer :: I,I2,J,J2,K,I3,L,IC,IDUM
+!
+!
+      DO I=1,3*LEV
+        DO J=1,3*LEV
+          OUT(I,J)=BLANK
+        ENDDO
+      ENDDO
+!
+!
+      DO I=3,3*LEV,3
+         I2=I/3
+!
+         J2=3*LEV+1-3*HEIGHT(I2)
+         DO J=3*LEV,J2,-1
+            OUT(J,I)=UP
+         ENDDO
+!
+         DO K=I,3,-1
+            I3=INT((K+2)/3)
+            IF ( (3*LEV+1-HEIGHT(I3)*3).LT.J2) GOTO 100
+            OUT(J2,K)=ACROSS
+         ENDDO
+  100    CONTINUE
+!
+      ENDDO
+!
+!
+      IC=3
+      DO I=1,3*LEV
+      IF (I.EQ.IC+1) THEN
+                   IDUM=IC/3
+                   IDUM=LEV-IDUM
+                   DO L=1,LEV
+                      IF (HEIGHT(L).EQ.IDUM) GOTO 190
+                   ENDDO
+  190              IDUM=L
+                   WRITE(iout,200) CRITVAL(IDUM),(OUT(I,J),J=1,3*LEV)
+                   IC=IC+3
+                   ELSE
+                   LINE = ' '
+                   WRITE(iout,210) (OUT(I,J),J=1,3*LEV)
+      ENDIF
+  200 FORMAT(1H ,8X,F12.2,4X,27000A1)
+  210 FORMAT(1H ,24X,27000A1)
+      ENDDO
+      WRITE(iout,250)
+      WRITE(iout,220)(IORDER(J),J=1,LEV)
+      WRITE(iout,250)
+  220 FORMAT(1H ,24X,9000I3)
+      WRITE(iout,230) LEV
+  230 FORMAT(1H ,13X,'CRITERION        CLUSTERS 1 TO ',i3)
+      WRITE(iout,240) LEV-1
+  240 FORMAT(1H ,13X,'VALUES.      (TOP ',i3,' LEVELS OF HIERARCHY).')
+  250 FORMAT(/)
+!
+!
+      RETURN
+      END SUBROUTINE HCDEN
+!-----------------------------------------------------------------------------
+      end module hc_
+!-----------------------------------------------------------------------------
+!-----------------------------------------------------------------------------