Merge branch 'lipid' of mmka.chem.univ.gda.pl:unres into lipid

[unres.git] / source / unres / src_Eshel / sumsld.f
diff --git a/source/unres/src_Eshel/sumsld.f b/source/unres/src_Eshel/sumsld.f

new file mode 100644 (file)

index 0000000..1ce7b78
--- /dev/null
+++ b/source/unres/src_Eshel/sumsld.f
@@ -0,0 +1,1446 @@
+      subroutine sumsl(n, d, x, calcf, calcg, iv, liv, lv, v,
+     1                  uiparm, urparm, ufparm)
+c
+c  ***  minimize general unconstrained objective function using   ***
+c  ***  analytic gradient and hessian approx. from secant update  ***
+c
+      integer n, liv, lv
+      integer iv(liv), uiparm(1)
+      double precision d(n), x(n), v(lv), urparm(1)
+c     dimension v(71 + n*(n+15)/2), uiparm(*), urparm(*)
+      external calcf, calcg, ufparm
+c
+c  ***  purpose  ***
+c
+c        this routine interacts with subroutine  sumit  in an attempt
+c     to find an n-vector  x*  that minimizes the (unconstrained)
+c     objective function computed by  calcf.  (often the  x*  found is
+c     a local minimizer rather than a global one.)
+c
+c--------------------------  parameter usage  --------------------------
+c
+c n........ (input) the number of variables on which  f  depends, i.e.,
+c                  the number of components in  x.
+c d........ (input/output) a scale vector such that  d(i)*x(i),
+c                  i = 1,2,...,n,  are all in comparable units.
+c                  d can strongly affect the behavior of sumsl.
+c                  finding the best choice of d is generally a trial-
+c                  and-error process.  choosing d so that d(i)*x(i)
+c                  has about the same value for all i often works well.
+c                  the defaults provided by subroutine deflt (see i
+c                  below) require the caller to supply d.
+c x........ (input/output) before (initially) calling sumsl, the call-
+c                  er should set  x  to an initial guess at  x*.  when
+c                  sumsl returns,  x  contains the best point so far
+c                  found, i.e., the one that gives the least value so
+c                  far seen for  f(x).
+c calcf.... (input) a subroutine that, given x, computes f(x).  calcf
+c                  must be declared external in the calling program.
+c                  it is invoked by
+c                       call calcf(n, x, nf, f, uiparm, urparm, ufparm)
+c                  when calcf is called, nf is the invocation
+c                  count for calcf.  nf is included for possible use
+c                  with calcg.  if x is out of bounds (e.g., if it
+c                  would cause overflow in computing f(x)), then calcf
+c                  should set nf to 0.  this will cause a shorter step
+c                  to be attempted.  (if x is in bounds, then calcf
+c                  should not change nf.)  the other parameters are as
+c                  described above and below.  calcf should not change
+c                  n, p, or x.
+c calcg.... (input) a subroutine that, given x, computes g(x), the gra-
+c                  dient of f at x.  calcg must be declared external in
+c                  the calling program.  it is invoked by
+c                       call calcg(n, x, nf, g, uiparm, urparm, ufaprm)
+c                  when calcg is called, nf is the invocation
+c                  count for calcf at the time f(x) was evaluated.  the
+c                  x passed to calcg is usually the one passed to calcf
+c                  on either its most recent invocation or the one
+c                  prior to it.  if calcf saves intermediate results
+c                  for use by calcg, then it is possible to tell from
+c                  nf whether they are valid for the current x (or
+c                  which copy is valid if two copies are kept).  if g
+c                  cannot be computed at x, then calcg should set nf to
+c                  0.  in this case, sumsl will return with iv(1) = 65.
+c                  (if g can be computed at x, then calcg should not
+c                  changed nf.)  the other parameters to calcg are as
+c                  described above and below.  calcg should not change
+c                  n or x.
+c iv....... (input/output) an integer value array of length liv (see
+c                  below) that helps control the sumsl algorithm and
+c                  that is used to store various intermediate quanti-
+c                  ties.  of particular interest are the initialization/
+c                  return code iv(1) and the entries in iv that control
+c                  printing and limit the number of iterations and func-
+c                  tion evaluations.  see the section on iv input
+c                  values below.
+c liv...... (input) length of iv array.  must be at least 60.  if li
+c                  is too small, then sumsl returns with iv(1) = 15.
+c                  when sumsl returns, the smallest allowed value of
+c                  liv is stored in iv(lastiv) -- see the section on
+c                  iv output values below.  (this is intended for use
+c                  with extensions of sumsl that handle constraints.)
+c lv....... (input) length of v array.  must be at least 71+n*(n+15)/2.
+c                  (at least 77+n*(n+17)/2 for smsno, at least
+c                  78+n*(n+12) for humsl).  if lv is too small, then
+c                  sumsl returns with iv(1) = 16.  when sumsl returns,
+c                  the smallest allowed value of lv is stored in
+c                  iv(lastv) -- see the section on iv output values
+c                  below.
+c v........ (input/output) a floating-point value array of length l
+c                  (see below) that helps control the sumsl algorithm
+c                  and that is used to store various intermediate
+c                  quantities.  of particular interest are the entries
+c                  in v that limit the length of the first step
+c                  attempted (lmax0) and specify convergence tolerances
+c                  (afctol, lmaxs, rfctol, sctol, xctol, xftol).
+c uiparm... (input) user integer parameter array passed without change
+c                  to calcf and calcg.
+c urparm... (input) user floating-point parameter array passed without
+c                  change to calcf and calcg.
+c ufparm... (input) user external subroutine or function passed without
+c                  change to calcf and calcg.
+c
+c  ***  iv input values (from subroutine deflt)  ***
+c
+c iv(1)...  on input, iv(1) should have a value between 0 and 14......
+c             0 and 12 mean this is a fresh start.  0 means that
+c                  deflt(2, iv, liv, lv, v)
+c             is to be called to provide all default values to iv and
+c             v.  12 (the value that deflt assigns to iv(1)) means the
+c             caller has already called deflt and has possibly changed
+c             some iv and/or v entries to non-default values.
+c             13 means deflt has been called and that sumsl (and
+c             sumit) should only do their storage allocation.  that is,
+c             they should set the output components of iv that tell
+c             where various subarrays arrays of v begin, such as iv(g)
+c             (and, for humsl and humit only, iv(dtol)), and return.
+c             14 means that a storage has been allocated (by a call
+c             with iv(1) = 13) and that the algorithm should be
+c             started.  when called with iv(1) = 13, sumsl returns
+c             iv(1) = 14 unless liv or lv is too small (or n is not
+c             positive).  default = 12.
+c iv(inith).... iv(25) tells whether the hessian approximation h should
+c             be initialized.  1 (the default) means sumit should
+c             initialize h to the diagonal matrix whose i-th diagonal
+c             element is d(i)**2.  0 means the caller has supplied a
+c             cholesky factor  l  of the initial hessian approximation
+c             h = l*(l**t)  in v, starting at v(iv(lmat)) = v(iv(42))
+c             (and stored compactly by rows).  note that iv(lmat) may
+c             be initialized by calling sumsl with iv(1) = 13 (see
+c             the iv(1) discussion above).  default = 1.
+c iv(mxfcal)... iv(17) gives the maximum number of function evaluations
+c             (calls on calcf) allowed.  if this number does not suf-
+c             fice, then sumsl returns with iv(1) = 9.  default = 200.
+c iv(mxiter)... iv(18) gives the maximum number of iterations allowed.
+c             it also indirectly limits the number of gradient evalua-
+c             tions (calls on calcg) to iv(mxiter) + 1.  if iv(mxiter)
+c             iterations do not suffice, then sumsl returns with
+c             iv(1) = 10.  default = 150.
+c iv(outlev)... iv(19) controls the number and length of iteration sum-
+c             mary lines printed (by itsum).  iv(outlev) = 0 means do
+c             not print any summary lines.  otherwise, print a summary
+c             line after each abs(iv(outlev)) iterations.  if iv(outlev)
+c             is positive, then summary lines of length 78 (plus carri-
+c             age control) are printed, including the following...  the
+c             iteration and function evaluation counts, f = the current
+c             function value, relative difference in function values
+c             achieved by the latest step (i.e., reldf = (f0-v(f))/f01,
+c             where f01 is the maximum of abs(v(f)) and abs(v(f0)) and
+c             v(f0) is the function value from the previous itera-
+c             tion), the relative function reduction predicted for the
+c             step just taken (i.e., preldf = v(preduc) / f01, where
+c             v(preduc) is described below), the scaled relative change
+c             in x (see v(reldx) below), the step parameter for the
+c             step just taken (stppar = 0 means a full newton step,
+c             between 0 and 1 means a relaxed newton step, between 1
+c             and 2 means a double dogleg step, greater than 2 means
+c             a scaled down cauchy step -- see subroutine dbldog), the
+c             2-norm of the scale vector d times the step just taken
+c             (see v(dstnrm) below), and npreldf, i.e.,
+c             v(nreduc)/f01, where v(nreduc) is described below -- if
+c             npreldf is positive, then it is the relative function
+c             reduction predicted for a newton step (one with
+c             stppar = 0).  if npreldf is negative, then it is the
+c             negative of the relative function reduction predicted
+c             for a step computed with step bound v(lmaxs) for use in
+c             testing for singular convergence.
+c                  if iv(outlev) is negative, then lines of length 50
+c             are printed, including only the first 6 items listed
+c             above (through reldx).
+c             default = 1.
+c iv(parprt)... iv(20) = 1 means print any nondefault v values on a
+c             fresh start or any changed v values on a restart.
+c             iv(parprt) = 0 means skip this printing.  default = 1.
+c iv(prunit)... iv(21) is the output unit number on which all printing
+c             is done.  iv(prunit) = 0 means suppress all printing.
+c             default = standard output unit (unit 6 on most systems).
+c iv(solprt)... iv(22) = 1 means print out the value of x returned (as
+c             well as the gradient and the scale vector d).
+c             iv(solprt) = 0 means skip this printing.  default = 1.
+c iv(statpr)... iv(23) = 1 means print summary statistics upon return-
+c             ing.  these consist of the function value, the scaled
+c             relative change in x caused by the most recent step (see
+c             v(reldx) below), the number of function and gradient
+c             evaluations (calls on calcf and calcg), and the relative
+c             function reductions predicted for the last step taken and
+c             for a newton step (or perhaps a step bounded by v(lmaxs)
+c             -- see the descriptions of preldf and npreldf under
+c             iv(outlev) above).
+c             iv(statpr) = 0 means skip this printing.
+c             iv(statpr) = -1 means skip this printing as well as that
+c             of the one-line termination reason message.  default = 1.
+c iv(x0prt).... iv(24) = 1 means print the initial x and scale vector d
+c             (on a fresh start only).  iv(x0prt) = 0 means skip this
+c             printing.  default = 1.
+c
+c  ***  (selected) iv output values  ***
+c
+c iv(1)........ on output, iv(1) is a return code....
+c             3 = x-convergence.  the scaled relative difference (see
+c                  v(reldx)) between the current parameter vector x and
+c                  a locally optimal parameter vector is very likely at
+c                  most v(xctol).
+c             4 = relative function convergence.  the relative differ-
+c                  ence between the current function value and its lo-
+c                  cally optimal value is very likely at most v(rfctol).
+c             5 = both x- and relative function convergence (i.e., the
+c                  conditions for iv(1) = 3 and iv(1) = 4 both hold).
+c             6 = absolute function convergence.  the current function
+c                  value is at most v(afctol) in absolute value.
+c             7 = singular convergence.  the hessian near the current
+c                  iterate appears to be singular or nearly so, and a
+c                  step of length at most v(lmaxs) is unlikely to yield
+c                  a relative function decrease of more than v(sctol).
+c             8 = false convergence.  the iterates appear to be converg-
+c                  ing to a noncritical point.  this may mean that the
+c                  convergence tolerances (v(afctol), v(rfctol),
+c                  v(xctol)) are too small for the accuracy to which
+c                  the function and gradient are being computed, that
+c                  there is an error in computing the gradient, or that
+c                  the function or gradient is discontinuous near x.
+c             9 = function evaluation limit reached without other con-
+c                  vergence (see iv(mxfcal)).
+c            10 = iteration limit reached without other convergence
+c                  (see iv(mxiter)).
+c            11 = stopx returned .true. (external interrupt).  see the
+c                  usage notes below.
+c            14 = storage has been allocated (after a call with
+c                  iv(1) = 13).
+c            17 = restart attempted with n changed.
+c            18 = d has a negative component and iv(dtype) .le. 0.
+c            19...43 = v(iv(1)) is out of range.
+c            63 = f(x) cannot be computed at the initial x.
+c            64 = bad parameters passed to assess (which should not
+c                  occur).
+c            65 = the gradient could not be computed at x (see calcg
+c                  above).
+c            67 = bad first parameter to deflt.
+c            80 = iv(1) was out of range.
+c            81 = n is not positive.
+c iv(g)........ iv(28) is the starting subscript in v of the current
+c             gradient vector (the one corresponding to x).
+c iv(lastiv)... iv(44) is the least acceptable value of liv.  (it is
+c             only set if liv is at least 44.)
+c iv(lastv).... iv(45) is the least acceptable value of lv.  (it is
+c             only set if liv is large enough, at least iv(lastiv).)
+c iv(nfcall)... iv(6) is the number of calls so far made on calcf (i.e.,
+c             function evaluations).
+c iv(ngcall)... iv(30) is the number of gradient evaluations (calls on
+c             calcg).
+c iv(niter).... iv(31) is the number of iterations performed.
+c
+c  ***  (selected) v input values (from subroutine deflt)  ***
+c
+c v(bias)..... v(43) is the bias parameter used in subroutine dbldog --
+c             see that subroutine for details.  default = 0.8.
+c v(afctol)... v(31) is the absolute function convergence tolerance.
+c             if sumsl finds a point where the function value is less
+c             than v(afctol) in absolute value, and if sumsl does not
+c             return with iv(1) = 3, 4, or 5, then it returns with
+c             iv(1) = 6.  this test can be turned off by setting
+c             v(afctol) to zero.  default = max(10**-20, machep**2),
+c             where machep is the unit roundoff.
+c v(dinit).... v(38), if nonnegative, is the value to which the scale
+c             vector d is initialized.  default = -1.
+c v(lmax0).... v(35) gives the maximum 2-norm allowed for d times the
+c             very first step that sumsl attempts.  this parameter can
+c             markedly affect the performance of sumsl.
+c v(lmaxs).... v(36) is used in testing for singular convergence -- if
+c             the function reduction predicted for a step of length
+c             bounded by v(lmaxs) is at most v(sctol) * abs(f0), where
+c             f0  is the function value at the start of the current
+c             iteration, and if sumsl does not return with iv(1) = 3,
+c             4, 5, or 6, then it returns with iv(1) = 7.  default = 1.
+c v(rfctol)... v(32) is the relative function convergence tolerance.
+c             if the current model predicts a maximum possible function
+c             reduction (see v(nreduc)) of at most v(rfctol)*abs(f0)
+c             at the start of the current iteration, where  f0  is the
+c             then current function value, and if the last step attempt-
+c             ed achieved no more than twice the predicted function
+c             decrease, then sumsl returns with iv(1) = 4 (or 5).
+c             default = max(10**-10, machep**(2/3)), where machep is
+c             the unit roundoff.
+c v(sctol).... v(37) is the singular convergence tolerance -- see the
+c             description of v(lmaxs) above.
+c v(tuner1)... v(26) helps decide when to check for false convergence.
+c             this is done if the actual function decrease from the
+c             current step is no more than v(tuner1) times its predict-
+c             ed value.  default = 0.1.
+c v(xctol).... v(33) is the x-convergence tolerance.  if a newton step
+c             (see v(nreduc)) is tried that has v(reldx) .le. v(xctol)
+c             and if this step yields at most twice the predicted func-
+c             tion decrease, then sumsl returns with iv(1) = 3 (or 5).
+c             (see the description of v(reldx) below.)
+c             default = machep**0.5, where machep is the unit roundoff.
+c v(xftol).... v(34) is the false convergence tolerance.  if a step is
+c             tried that gives no more than v(tuner1) times the predict-
+c             ed function decrease and that has v(reldx) .le. v(xftol),
+c             and if sumsl does not return with iv(1) = 3, 4, 5, 6, or
+c             7, then it returns with iv(1) = 8.  (see the description
+c             of v(reldx) below.)  default = 100*machep, where
+c             machep is the unit roundoff.
+c v(*)........ deflt supplies to v a number of tuning constants, with
+c             which it should ordinarily be unnecessary to tinker.  see
+c             section 17 of version 2.2 of the nl2sol usage summary
+c             (i.e., the appendix to ref. 1) for details on v(i),
+c             i = decfac, incfac, phmnfc, phmxfc, rdfcmn, rdfcmx,
+c             tuner2, tuner3, tuner4, tuner5.
+c
+c  ***  (selected) v output values  ***
+c
+c v(dgnorm)... v(1) is the 2-norm of (diag(d)**-1)*g, where g is the
+c             most recently computed gradient.
+c v(dstnrm)... v(2) is the 2-norm of diag(d)*step, where step is the
+c             current step.
+c v(f)........ v(10) is the current function value.
+c v(f0)....... v(13) is the function value at the start of the current
+c             iteration.
+c v(nreduc)... v(6), if positive, is the maximum function reduction
+c             possible according to the current model, i.e., the func-
+c             tion reduction predicted for a newton step (i.e.,
+c             step = -h**-1 * g,  where  g  is the current gradient and
+c             h is the current hessian approximation).
+c                  if v(nreduc) is negative, then it is the negative of
+c             the function reduction predicted for a step computed with
+c             a step bound of v(lmaxs) for use in testing for singular
+c             convergence.
+c v(preduc)... v(7) is the function reduction predicted (by the current
+c             quadratic model) for the current step.  this (divided by
+c             v(f0)) is used in testing for relative function
+c             convergence.
+c v(reldx).... v(17) is the scaled relative change in x caused by the
+c             current step, computed as
+c                  max(abs(d(i)*(x(i)-x0(i)), 1 .le. i .le. p) /
+c                     max(d(i)*(abs(x(i))+abs(x0(i))), 1 .le. i .le. p),
+c             where x = x0 + step.
+c
+c-------------------------------  notes  -------------------------------
+c
+c  ***  algorithm notes  ***
+c
+c        this routine uses a hessian approximation computed from the
+c     bfgs update (see ref 3).  only a cholesky factor of the hessian
+c     approximation is stored, and this is updated using ideas from
+c     ref. 4.  steps are computed by the double dogleg scheme described
+c     in ref. 2.  the steps are assessed as in ref. 1.
+c
+c  ***  usage notes  ***
+c
+c        after a return with iv(1) .le. 11, it is possible to restart,
+c     i.e., to change some of the iv and v input values described above
+c     and continue the algorithm from the point where it was interrupt-
+c     ed.  iv(1) should not be changed, nor should any entries of i
+c     and v other than the input values (those supplied by deflt).
+c        those who do not wish to write a calcg which computes the
+c     gradient analytically should call smsno rather than sumsl.
+c     smsno uses finite differences to compute an approximate gradient.
+c        those who would prefer to provide f and g (the function and
+c     gradient) by reverse communication rather than by writing subrou-
+c     tines calcf and calcg may call on sumit directly.  see the com-
+c     ments at the beginning of sumit.
+c        those who use sumsl interactively may wish to supply their
+c     own stopx function, which should return .true. if the break key
+c     has been pressed since stopx was last invoked.  this makes it
+c     possible to externally interrupt sumsl (which will return with
+c     iv(1) = 11 if stopx returns .true.).
+c        storage for g is allocated at the end of v.  thus the caller
+c     may make v longer than specified above and may allow calcg to use
+c     elements of g beyond the first n as scratch storage.
+c
+c  ***  portability notes  ***
+c
+c        the sumsl distribution tape contains both single- and double-
+c     precision versions of the sumsl source code, so it should be un-
+c     necessary to change precisions.
+c        only the functions imdcon and rmdcon contain machine-dependent
+c     constants.  to change from one machine to another, it should
+c     suffice to change the (few) relevant lines in these functions.
+c        intrinsic functions are explicitly declared.  on certain com-
+c     puters (e.g. univac), it may be necessary to comment out these
+c     declarations.  so that this may be done automatically by a simple
+c     program, such declarations are preceded by a comment having c/+
+c     in columns 1-3 and blanks in columns 4-72 and are followed by
+c     a comment having c/ in columns 1 and 2 and blanks in columns 3-72.
+c        the sumsl source code is expressed in 1966 ansi standard
+c     fortran.  it may be converted to fortran 77 by commenting out all
+c     lines that fall between a line having c/6 in columns 1-3 and a
+c     line having c/7 in columns 1-3 and by removing (i.e., replacing
+c     by a blank) the c in column 1 of the lines that follow the c/7
+c     line and precede a line having c/ in columns 1-2 and blanks in
+c     columns 3-72.  these changes convert some data statements into
+c     parameter statements, convert some variables from real to
+c     character*4, and make the data statements that initialize these
+c     variables use character strings delimited by primes instead
+c     of hollerith constants.  (such variables and data statements
+c     appear only in modules itsum and parck.  parameter statements
+c     appear nearly everywhere.)  these changes also add save state-
+c     ments for variables given machine-dependent constants by rmdcon.
+c
+c  ***  references  ***
+c
+c 1.  dennis, j.e., gay, d.m., and welsch, r.e. (1981), algorithm 573 --
+c             an adaptive nonlinear least-squares algorithm, acm trans.
+c             math. software 7, pp. 369-383.
+c
+c 2.  dennis, j.e., and mei, h.h.w. (1979), two new unconstrained opti-
+c             mization algorithms which use function and gradient
+c             values, j. optim. theory applic. 28, pp. 453-482.
+c
+c 3.  dennis, j.e., and more, j.j. (1977), quasi-newton methods, motiva-
+c             tion and theory, siam rev. 19, pp. 46-89.
+c
+c 4.  goldfarb, d. (1976), factorized variable metric methods for uncon-
+c             strained optimization, math. comput. 30, pp. 796-811.
+c
+c  ***  general  ***
+c
+c     coded by david m. gay (winter 1980).  revised summer 1982.
+c     this subroutine was written in connection with research
+c     supported in part by the national science foundation under
+c     grants mcs-7600324, dcr75-10143, 76-14311dss, mcs76-11989,
+c     and mcs-7906671.
+c.
+c
+c----------------------------  declarations  ---------------------------
+c
+      external deflt, sumit
+c
+c deflt... supplies default iv and v input components.
+c sumit... reverse-communication routine that carries out sumsl algo-
+c             rithm.
+c
+      integer g1, iv1, nf
+      double precision f
+c
+c  ***  subscripts for iv   ***
+c
+      integer nextv, nfcall, nfgcal, g, toobig, vneed
+c
+c/6
+c     data nextv/47/, nfcall/6/, nfgcal/7/, g/28/, toobig/2/, vneed/4/
+c/7
+      parameter (nextv=47, nfcall=6, nfgcal=7, g=28, toobig=2, vneed=4)
+c/
+c
+c+++++++++++++++++++++++++++++++  body  ++++++++++++++++++++++++++++++++
+c
+      if (iv(1) .eq. 0) call deflt(2, iv, liv, lv, v)
+      iv1 = iv(1)
+      if (iv1 .eq. 12 .or. iv1 .eq. 13) iv(vneed) = iv(vneed) + n
+      if (iv1 .eq. 14) go to 10
+      if (iv1 .gt. 2 .and. iv1 .lt. 12) go to 10
+      g1 = 1
+      if (iv1 .eq. 12) iv(1) = 13
+      go to 20
+c
+ 10   g1 = iv(g)
+c
+ 20   call sumit(d, f, v(g1), iv, liv, lv, n, v, x)
+      if (iv(1) - 2) 30, 40, 50
+c
+ 30   nf = iv(nfcall)
+      call calcf(n, x, nf, f, uiparm, urparm, ufparm)
+      if (nf .le. 0) iv(toobig) = 1
+      go to 20
+c
+ 40   call calcg(n, x, iv(nfgcal), v(g1), uiparm, urparm, ufparm)
+      go to 20
+c
+ 50   if (iv(1) .ne. 14) go to 999
+c
+c  ***  storage allocation
+c
+      iv(g) = iv(nextv)
+      iv(nextv) = iv(g) + n
+      if (iv1 .ne. 13) go to 10
+c
+ 999  return
+c  ***  last card of sumsl follows  ***
+      end
+      subroutine sumit(d, fx, g, iv, liv, lv, n, v, x)
+c
+c  ***  carry out sumsl (unconstrained minimization) iterations, using
+c  ***  double-dogleg/bfgs steps.
+c
+c  ***  parameter declarations  ***
+c
+      integer liv, lv, n
+      integer iv(liv)
+      double precision d(n), fx, g(n), v(lv), x(n)
+c
+c--------------------------  parameter usage  --------------------------
+c
+c d.... scale vector.
+c fx... function value.
+c g.... gradient vector.
+c iv... integer value array.
+c liv.. length of iv (at least 60).
+c lv... length of v (at least 71 + n*(n+13)/2).
+c n.... number of variables (components in x and g).
+c v.... floating-point value array.
+c x.... vector of parameters to be optimized.
+c
+c  ***  discussion  ***
+c
+c        parameters iv, n, v, and x are the same as the corresponding
+c     ones to sumsl (which see), except that v can be shorter (since
+c     the part of v that sumsl uses for storing g is not needed).
+c     moreover, compared with sumsl, iv(1) may have the two additional
+c     output values 1 and 2, which are explained below, as is the use
+c     of iv(toobig) and iv(nfgcal).  the value iv(g), which is an
+c     output value from sumsl (and smsno), is not referenced by
+c     sumit or the subroutines it calls.
+c        fx and g need not have been initialized when sumit is called
+c     with iv(1) = 12, 13, or 14.
+c
+c iv(1) = 1 means the caller should set fx to f(x), the function value
+c             at x, and call sumit again, having changed none of the
+c             other parameters.  an exception occurs if f(x) cannot be
+c             (e.g. if overflow would occur), which may happen because
+c             of an oversized step.  in this case the caller should set
+c             iv(toobig) = iv(2) to 1, which will cause sumit to ig-
+c             nore fx and try a smaller step.  the parameter nf that
+c             sumsl passes to calcf (for possible use by calcg) is a
+c             copy of iv(nfcall) = iv(6).
+c iv(1) = 2 means the caller should set g to g(x), the gradient vector
+c             of f at x, and call sumit again, having changed none of
+c             the other parameters except possibly the scale vector d
+c             when iv(dtype) = 0.  the parameter nf that sumsl passes
+c             to calcg is iv(nfgcal) = iv(7).  if g(x) cannot be
+c             evaluated, then the caller may set iv(nfgcal) to 0, in
+c             which case sumit will return with iv(1) = 65.
+c.
+c  ***  general  ***
+c
+c     coded by david m. gay (december 1979).  revised sept. 1982.
+c     this subroutine was written in connection with research supported
+c     in part by the national science foundation under grants
+c     mcs-7600324 and mcs-7906671.
+c
+c        (see sumsl for references.)
+c
+c+++++++++++++++++++++++++++  declarations  ++++++++++++++++++++++++++++
+c
+c  ***  local variables  ***
+c
+      integer dg1, dummy, g01, i, k, l, lstgst, nwtst1, step1,
+     1        temp1, w, x01, z
+      double precision t
+c
+c     ***  constants  ***
+c
+      double precision half, negone, one, onep2, zero
+c
+c  ***  no intrinsic functions  ***
+c
+c  ***  external functions and subroutines  ***
+c
+      external assst, dbdog, deflt, dotprd, itsum, litvmu, livmul,
+     1         ltvmul, lupdat, lvmul, parck, reldst, stopx, vaxpy,
+     2         vcopy, vscopy, vvmulp, v2norm, wzbfgs
+      logical stopx
+      double precision dotprd, reldst, v2norm
+c
+c assst.... assesses candidate step.
+c dbdog.... computes double-dogleg (candidate) step.
+c deflt.... supplies default iv and v input components.
+c dotprd... returns inner product of two vectors.
+c itsum.... prints iteration summary and info on initial and final x.
+c litvmu... multiplies inverse transpose of lower triangle times vector.
+c livmul... multiplies inverse of lower triangle times vector.
+c ltvmul... multiplies transpose of lower triangle times vector.
+c lupdt.... updates cholesky factor of hessian approximation.
+c lvmul.... multiplies lower triangle times vector.
+c parck.... checks validity of input iv and v values.
+c reldst... computes v(reldx) = relative step size.
+c stopx.... returns .true. if the break key has been pressed.
+c vaxpy.... computes scalar times one vector plus another.
+c vcopy.... copies one vector to another.
+c vscopy... sets all elements of a vector to a scalar.
+c vvmulp... multiplies vector by vector raised to power (componentwise).
+c v2norm... returns the 2-norm of a vector.
+c wzbfgs... computes w and z for lupdat corresponding to bfgs update.
+c
+c  ***  subscripts for iv and v  ***
+c
+      integer afctol
+      integer cnvcod, dg, dgnorm, dinit, dstnrm, dst0, f, f0, fdif,
+     1        gthg, gtstep, g0, incfac, inith, irc, kagqt, lmat, lmax0,
+     2        lmaxs, mode, model, mxfcal, mxiter, nextv, nfcall, nfgcal,
+     3        ngcall, niter, nreduc, nwtstp, preduc, radfac, radinc,
+     4        radius, rad0, reldx, restor, step, stglim, stlstg, toobig,
+     5        tuner4, tuner5, vneed, xirc, x0
+c
+c  ***  iv subscript values  ***
+c
+c/6
+c     data cnvcod/55/, dg/37/, g0/48/, inith/25/, irc/29/, kagqt/33/,
+c    1     mode/35/, model/5/, mxfcal/17/, mxiter/18/, nfcall/6/,
+c    2     nfgcal/7/, ngcall/30/, niter/31/, nwtstp/34/, radinc/8/,
+c    3     restor/9/, step/40/, stglim/11/, stlstg/41/, toobig/2/,
+c    4     vneed/4/, xirc/13/, x0/43/
+c/7
+      parameter (cnvcod=55, dg=37, g0=48, inith=25, irc=29, kagqt=33,
+     1           mode=35, model=5, mxfcal=17, mxiter=18, nfcall=6,
+     2           nfgcal=7, ngcall=30, niter=31, nwtstp=34, radinc=8,
+     3           restor=9, step=40, stglim=11, stlstg=41, toobig=2,
+     4           vneed=4, xirc=13, x0=43)
+c/
+c
+c  ***  v subscript values  ***
+c
+c/6
+c     data afctol/31/
+c     data dgnorm/1/, dinit/38/, dstnrm/2/, dst0/3/, f/10/, f0/13/,
+c    1     fdif/11/, gthg/44/, gtstep/4/, incfac/23/, lmat/42/,
+c    2     lmax0/35/, lmaxs/36/, nextv/47/, nreduc/6/, preduc/7/,
+c    3     radfac/16/, radius/8/, rad0/9/, reldx/17/, tuner4/29/,
+c    4     tuner5/30/
+c/7
+      parameter (afctol=31)
+      parameter (dgnorm=1, dinit=38, dstnrm=2, dst0=3, f=10, f0=13,
+     1           fdif=11, gthg=44, gtstep=4, incfac=23, lmat=42,
+     2           lmax0=35, lmaxs=36, nextv=47, nreduc=6, preduc=7,
+     3           radfac=16, radius=8, rad0=9, reldx=17, tuner4=29,
+     4           tuner5=30)
+c/
+c
+c/6
+c     data half/0.5d+0/, negone/-1.d+0/, one/1.d+0/, onep2/1.2d+0/,
+c    1     zero/0.d+0/
+c/7
+      parameter (half=0.5d+0, negone=-1.d+0, one=1.d+0, onep2=1.2d+0,
+     1           zero=0.d+0)
+c/
+c
+c+++++++++++++++++++++++++++++++  body  ++++++++++++++++++++++++++++++++
+c
+C Following SAVE statement inserted.
+      save l
+      i = iv(1)
+      if (i .eq. 1) go to 50
+      if (i .eq. 2) go to 60
+c
+c  ***  check validity of iv and v input values  ***
+c
+      if (iv(1) .eq. 0) call deflt(2, iv, liv, lv, v)
+      if (iv(1) .eq. 12 .or. iv(1) .eq. 13)
+     1     iv(vneed) = iv(vneed) + n*(n+13)/2
+      call parck(2, d, iv, liv, lv, n, v)
+      i = iv(1) - 2
+      if (i .gt. 12) go to 999
+      go to (180, 180, 180, 180, 180, 180, 120, 90, 120, 10, 10, 20), i
+c
+c  ***  storage allocation  ***
+c
+10    l = iv(lmat)
+      iv(x0) = l + n*(n+1)/2
+      iv(step) = iv(x0) + n
+      iv(stlstg) = iv(step) + n
+      iv(g0) = iv(stlstg) + n
+      iv(nwtstp) = iv(g0) + n
+      iv(dg) = iv(nwtstp) + n
+      iv(nextv) = iv(dg) + n
+      if (iv(1) .ne. 13) go to 20
+         iv(1) = 14
+         go to 999
+c
+c  ***  initialization  ***
+c
+ 20   iv(niter) = 0
+      iv(nfcall) = 1
+      iv(ngcall) = 1
+      iv(nfgcal) = 1
+      iv(mode) = -1
+      iv(model) = 1
+      iv(stglim) = 1
+      iv(toobig) = 0
+      iv(cnvcod) = 0
+      iv(radinc) = 0
+      v(rad0) = zero
+      if (v(dinit) .ge. zero) call vscopy(n, d, v(dinit))
+      if (iv(inith) .ne. 1) go to 40
+c
+c     ***  set the initial hessian approximation to diag(d)**-2  ***
+c
+         l = iv(lmat)
+         call vscopy(n*(n+1)/2, v(l), zero)
+         k = l - 1
+         do 30 i = 1, n
+              k = k + i
+              t = d(i)
+              if (t .le. zero) t = one
+              v(k) = t
+ 30           continue
+c
+c  ***  compute initial function value  ***
+c
+ 40   iv(1) = 1
+      go to 999
+c
+ 50   v(f) = fx
+      if (iv(mode) .ge. 0) go to 180
+      iv(1) = 2
+      if (iv(toobig) .eq. 0) go to 999
+         iv(1) = 63
+         go to 300
+c
+c  ***  make sure gradient could be computed  ***
+c
+ 60   if (iv(nfgcal) .ne. 0) go to 70
+         iv(1) = 65
+         go to 300
+c
+ 70   dg1 = iv(dg)
+      call vvmulp(n, v(dg1), g, d, -1)
+      v(dgnorm) = v2norm(n, v(dg1))
+c
+c  ***  test norm of gradient  ***
+c
+      if (v(dgnorm) .gt. v(afctol)) go to 75
+      iv(irc) = 10
+      iv(cnvcod) = iv(irc) - 4
+c
+ 75   if (iv(cnvcod) .ne. 0) go to 290
+      if (iv(mode) .eq. 0) go to 250
+c
+c  ***  allow first step to have scaled 2-norm at most v(lmax0)  ***
+c
+      v(radius) = v(lmax0)
+c
+      iv(mode) = 0
+c
+c
+c-----------------------------  main loop  -----------------------------
+c
+c
+c  ***  print iteration summary, check iteration limit  ***
+c
+ 80   call itsum(d, g, iv, liv, lv, n, v, x)
+ 90   k = iv(niter)
+      if (k .lt. iv(mxiter)) go to 100
+         iv(1) = 10
+         go to 300
+c
+c  ***  update radius  ***
+c
+ 100  iv(niter) = k + 1
+      if(k.gt.0)v(radius) = v(radfac) * v(dstnrm)
+c
+c  ***  initialize for start of next iteration  ***
+c
+      g01 = iv(g0)
+      x01 = iv(x0)
+      v(f0) = v(f)
+      iv(irc) = 4
+      iv(kagqt) = -1
+c
+c     ***  copy x to x0, g to g0  ***
+c
+      call vcopy(n, v(x01), x)
+      call vcopy(n, v(g01), g)
+c
+c  ***  check stopx and function evaluation limit  ***
+c
+C AL 4/30/95
+      dummy=iv(nfcall)
+ 110  if (.not. stopx(dummy)) go to 130
+         iv(1) = 11
+         go to 140
+c
+c     ***  come here when restarting after func. eval. limit or stopx.
+c
+ 120  if (v(f) .ge. v(f0)) go to 130
+         v(radfac) = one
+         k = iv(niter)
+         go to 100
+c
+ 130  if (iv(nfcall) .lt. iv(mxfcal)) go to 150
+         iv(1) = 9
+ 140     if (v(f) .ge. v(f0)) go to 300
+c
+c        ***  in case of stopx or function evaluation limit with
+c        ***  improved v(f), evaluate the gradient at x.
+c
+              iv(cnvcod) = iv(1)
+              go to 240
+c
+c. . . . . . . . . . . . .  compute candidate step  . . . . . . . . . .
+c
+ 150  step1 = iv(step)
+      dg1 = iv(dg)
+      nwtst1 = iv(nwtstp)
+      if (iv(kagqt) .ge. 0) go to 160
+         l = iv(lmat)
+         call livmul(n, v(nwtst1), v(l), g)
+         v(nreduc) = half * dotprd(n, v(nwtst1), v(nwtst1))
+         call litvmu(n, v(nwtst1), v(l), v(nwtst1))
+         call vvmulp(n, v(step1), v(nwtst1), d, 1)
+         v(dst0) = v2norm(n, v(step1))
+         call vvmulp(n, v(dg1), v(dg1), d, -1)
+         call ltvmul(n, v(step1), v(l), v(dg1))
+         v(gthg) = v2norm(n, v(step1))
+         iv(kagqt) = 0
+ 160  call dbdog(v(dg1), lv, n, v(nwtst1), v(step1), v)
+      if (iv(irc) .eq. 6) go to 180
+c
+c  ***  check whether evaluating f(x0 + step) looks worthwhile  ***
+c
+      if (v(dstnrm) .le. zero) go to 180
+      if (iv(irc) .ne. 5) go to 170
+      if (v(radfac) .le. one) go to 170
+      if (v(preduc) .le. onep2 * v(fdif)) go to 180
+c
+c  ***  compute f(x0 + step)  ***
+c
+ 170  x01 = iv(x0)
+      step1 = iv(step)
+      call vaxpy(n, x, one, v(step1), v(x01))
+      iv(nfcall) = iv(nfcall) + 1
+      iv(1) = 1
+      iv(toobig) = 0
+      go to 999
+c
+c. . . . . . . . . . . . .  assess candidate step  . . . . . . . . . . .
+c
+ 180  x01 = iv(x0)
+      v(reldx) = reldst(n, d, x, v(x01))
+      call assst(iv, liv, lv, v)
+      step1 = iv(step)
+      lstgst = iv(stlstg)
+      if (iv(restor) .eq. 1) call vcopy(n, x, v(x01))
+      if (iv(restor) .eq. 2) call vcopy(n, v(lstgst), v(step1))
+      if (iv(restor) .ne. 3) go to 190
+         call vcopy(n, v(step1), v(lstgst))
+         call vaxpy(n, x, one, v(step1), v(x01))
+         v(reldx) = reldst(n, d, x, v(x01))
+c
+ 190  k = iv(irc)
+      go to (200,230,230,230,200,210,220,220,220,220,220,220,280,250), k
+c
+c     ***  recompute step with changed radius  ***
+c
+ 200     v(radius) = v(radfac) * v(dstnrm)
+         go to 110
+c
+c  ***  compute step of length v(lmaxs) for singular convergence test.
+c
+ 210  v(radius) = v(lmaxs)
+      go to 150
+c
+c  ***  convergence or false convergence  ***
+c
+ 220  iv(cnvcod) = k - 4
+      if (v(f) .ge. v(f0)) go to 290
+         if (iv(xirc) .eq. 14) go to 290
+              iv(xirc) = 14
+c
+c. . . . . . . . . . . .  process acceptable step  . . . . . . . . . . .
+c
+ 230  if (iv(irc) .ne. 3) go to 240
+         step1 = iv(step)
+         temp1 = iv(stlstg)
+c
+c     ***  set  temp1 = hessian * step  for use in gradient tests  ***
+c
+         l = iv(lmat)
+         call ltvmul(n, v(temp1), v(l), v(step1))
+         call lvmul(n, v(temp1), v(l), v(temp1))
+c
+c  ***  compute gradient  ***
+c
+ 240  iv(ngcall) = iv(ngcall) + 1
+      iv(1) = 2
+      go to 999
+c
+c  ***  initializations -- g0 = g - g0, etc.  ***
+c
+ 250  g01 = iv(g0)
+      call vaxpy(n, v(g01), negone, v(g01), g)
+      step1 = iv(step)
+      temp1 = iv(stlstg)
+      if (iv(irc) .ne. 3) go to 270
+c
+c  ***  set v(radfac) by gradient tests  ***
+c
+c     ***  set  temp1 = diag(d)**-1 * (hessian*step + (g(x0)-g(x)))  ***
+c
+         call vaxpy(n, v(temp1), negone, v(g01), v(temp1))
+         call vvmulp(n, v(temp1), v(temp1), d, -1)
+c
+c        ***  do gradient tests  ***
+c
+         if (v2norm(n, v(temp1)) .le. v(dgnorm) * v(tuner4))
+     1                  go to 260
+              if (dotprd(n, g, v(step1))
+     1                  .ge. v(gtstep) * v(tuner5))  go to 270
+ 260               v(radfac) = v(incfac)
+c
+c  ***  update h, loop  ***
+c
+ 270  w = iv(nwtstp)
+      z = iv(x0)
+      l = iv(lmat)
+      call wzbfgs(v(l), n, v(step1), v(w), v(g01), v(z))
+c
+c     ** use the n-vectors starting at v(step1) and v(g01) for scratch..
+      call lupdat(v(temp1), v(step1), v(l), v(g01), v(l), n, v(w), v(z))
+      iv(1) = 2
+      go to 80
+c
+c. . . . . . . . . . . . . .  misc. details  . . . . . . . . . . . . . .
+c
+c  ***  bad parameters to assess  ***
+c
+ 280  iv(1) = 64
+      go to 300
+c
+c  ***  print summary of final iteration and other requested items  ***
+c
+ 290  iv(1) = iv(cnvcod)
+      iv(cnvcod) = 0
+ 300  call itsum(d, g, iv, liv, lv, n, v, x)
+c
+ 999  return
+c
+c  ***  last line of sumit follows  ***
+      end
+      subroutine dbdog(dig, lv, n, nwtstp, step, v)
+c
+c  ***  compute double dogleg step  ***
+c
+c  ***  parameter declarations  ***
+c
+      integer lv, n
+      double precision dig(n), nwtstp(n), step(n), v(lv)
+c
+c  ***  purpose  ***
+c
+c        this subroutine computes a candidate step (for use in an uncon-
+c     strained minimization code) by the double dogleg algorithm of
+c     dennis and mei (ref. 1), which is a variation on powell*s dogleg
+c     scheme (ref. 2, p. 95).
+c
+c--------------------------  parameter usage  --------------------------
+c
+c    dig (input) diag(d)**-2 * g -- see algorithm notes.
+c      g (input) the current gradient vector.
+c     lv (input) length of v.
+c      n (input) number of components in  dig, g, nwtstp,  and  step.
+c nwtstp (input) negative newton step -- see algorithm notes.
+c   step (output) the computed step.
+c      v (i/o) values array, the following components of which are
+c             used here...
+c v(bias)   (input) bias for relaxed newton step, which is v(bias) of
+c             the way from the full newton to the fully relaxed newton
+c             step.  recommended value = 0.8 .
+c v(dgnorm) (input) 2-norm of diag(d)**-1 * g -- see algorithm notes.
+c v(dstnrm) (output) 2-norm of diag(d) * step, which is v(radius)
+c             unless v(stppar) = 0 -- see algorithm notes.
+c v(dst0) (input) 2-norm of diag(d) * nwtstp -- see algorithm notes.
+c v(grdfac) (output) the coefficient of  dig  in the step returned --
+c             step(i) = v(grdfac)*dig(i) + v(nwtfac)*nwtstp(i).
+c v(gthg)   (input) square-root of (dig**t) * (hessian) * dig -- see
+c             algorithm notes.
+c v(gtstep) (output) inner product between g and step.
+c v(nreduc) (output) function reduction predicted for the full newton
+c             step.
+c v(nwtfac) (output) the coefficient of  nwtstp  in the step returned --
+c             see v(grdfac) above.
+c v(preduc) (output) function reduction predicted for the step returned.
+c v(radius) (input) the trust region radius.  d times the step returned
+c             has 2-norm v(radius) unless v(stppar) = 0.
+c v(stppar) (output) code telling how step was computed... 0 means a
+c             full newton step.  between 0 and 1 means v(stppar) of the
+c             way from the newton to the relaxed newton step.  between
+c             1 and 2 means a true double dogleg step, v(stppar) - 1 of
+c             the way from the relaxed newton to the cauchy step.
+c             greater than 2 means 1 / (v(stppar) - 1) times the cauchy
+c             step.
+c
+c-------------------------------  notes  -------------------------------
+c
+c  ***  algorithm notes  ***
+c
+c        let  g  and  h  be the current gradient and hessian approxima-
+c     tion respectively and let d be the current scale vector.  this
+c     routine assumes dig = diag(d)**-2 * g  and  nwtstp = h**-1 * g.
+c     the step computed is the same one would get by replacing g and h
+c     by  diag(d)**-1 * g  and  diag(d)**-1 * h * diag(d)**-1,
+c     computing step, and translating step back to the original
+c     variables, i.e., premultiplying it by diag(d)**-1.
+c
+c  ***  references  ***
+c
+c 1.  dennis, j.e., and mei, h.h.w. (1979), two new unconstrained opti-
+c             mization algorithms which use function and gradient
+c             values, j. optim. theory applic. 28, pp. 453-482.
+c 2. powell, m.j.d. (1970), a hybrid method for non-linear equations,
+c             in numerical methods for non-linear equations, edited by
+c             p. rabinowitz, gordon and breach, london.
+c
+c  ***  general  ***
+c
+c     coded by david m. gay.
+c     this subroutine was written in connection with research supported
+c     by the national science foundation under grants mcs-7600324 and
+c     mcs-7906671.
+c
+c------------------------  external quantities  ------------------------
+c
+c  ***  functions and subroutines called  ***
+c
+      external dotprd, v2norm
+      double precision dotprd, v2norm
+c
+c dotprd... returns inner product of two vectors.
+c v2norm... returns 2-norm of a vector.
+c
+c  ***  intrinsic functions  ***
+c/+
+      double precision dsqrt
+c/
+c--------------------------  local variables  --------------------------
+c
+      integer i
+      double precision cfact, cnorm, ctrnwt, ghinvg, femnsq, gnorm,
+     1                 nwtnrm, relax, rlambd, t, t1, t2
+      double precision half, one, two, zero
+c
+c  ***  v subscripts  ***
+c
+      integer bias, dgnorm, dstnrm, dst0, grdfac, gthg, gtstep,
+     1        nreduc, nwtfac, preduc, radius, stppar
+c
+c  ***  data initializations  ***
+c
+c/6
+c     data half/0.5d+0/, one/1.d+0/, two/2.d+0/, zero/0.d+0/
+c/7
+      parameter (half=0.5d+0, one=1.d+0, two=2.d+0, zero=0.d+0)
+c/
+c
+c/6
+c     data bias/43/, dgnorm/1/, dstnrm/2/, dst0/3/, grdfac/45/,
+c    1     gthg/44/, gtstep/4/, nreduc/6/, nwtfac/46/, preduc/7/,
+c    2     radius/8/, stppar/5/
+c/7
+      parameter (bias=43, dgnorm=1, dstnrm=2, dst0=3, grdfac=45,
+     1           gthg=44, gtstep=4, nreduc=6, nwtfac=46, preduc=7,
+     2           radius=8, stppar=5)
+c/
+c
+c+++++++++++++++++++++++++++++++  body  ++++++++++++++++++++++++++++++++
+c
+      nwtnrm = v(dst0)
+      rlambd = one
+      if (nwtnrm .gt. zero) rlambd = v(radius) / nwtnrm
+      gnorm = v(dgnorm)
+      ghinvg = two * v(nreduc)
+      v(grdfac) = zero
+      v(nwtfac) = zero
+      if (rlambd .lt. one) go to 30
+c
+c        ***  the newton step is inside the trust region  ***
+c
+         v(stppar) = zero
+         v(dstnrm) = nwtnrm
+         v(gtstep) = -ghinvg
+         v(preduc) = v(nreduc)
+         v(nwtfac) = -one
+         do 20 i = 1, n
+ 20           step(i) = -nwtstp(i)
+         go to 999
+c
+ 30   v(dstnrm) = v(radius)
+      cfact = (gnorm / v(gthg))**2
+c     ***  cauchy step = -cfact * g.
+      cnorm = gnorm * cfact
+      relax = one - v(bias) * (one - gnorm*cnorm/ghinvg)
+      if (rlambd .lt. relax) go to 50
+c
+c        ***  step is between relaxed newton and full newton steps  ***
+c
+         v(stppar)  =  one  -  (rlambd - relax) / (one - relax)
+         t = -rlambd
+         v(gtstep) = t * ghinvg
+         v(preduc) = rlambd * (one - half*rlambd) * ghinvg
+         v(nwtfac) = t
+         do 40 i = 1, n
+ 40           step(i) = t * nwtstp(i)
+         go to 999
+c
+ 50   if (cnorm .lt. v(radius)) go to 70
+c
+c        ***  the cauchy step lies outside the trust region --
+c        ***  step = scaled cauchy step  ***
+c
+         t = -v(radius) / gnorm
+         v(grdfac) = t
+         v(stppar) = one  +  cnorm / v(radius)
+         v(gtstep) = -v(radius) * gnorm
+      v(preduc) = v(radius)*(gnorm - half*v(radius)*(v(gthg)/gnorm)**2)
+         do 60 i = 1, n
+ 60           step(i) = t * dig(i)
+         go to 999
+c
+c     ***  compute dogleg step between cauchy and relaxed newton  ***
+c     ***  femur = relaxed newton step minus cauchy step  ***
+c
+ 70   ctrnwt = cfact * relax * ghinvg / gnorm
+c     *** ctrnwt = inner prod. of cauchy and relaxed newton steps,
+c     *** scaled by gnorm**-1.
+      t1 = ctrnwt - gnorm*cfact**2
+c     ***  t1 = inner prod. of femur and cauchy step, scaled by
+c     ***  gnorm**-1.
+      t2 = v(radius)*(v(radius)/gnorm) - gnorm*cfact**2
+      t = relax * nwtnrm
+      femnsq = (t/gnorm)*t - ctrnwt - t1
+c     ***  femnsq = square of 2-norm of femur, scaled by gnorm**-1.
+      t = t2 / (t1 + dsqrt(t1**2 + femnsq*t2))
+c     ***  dogleg step  =  cauchy step  +  t * femur.
+      t1 = (t - one) * cfact
+      v(grdfac) = t1
+      t2 = -t * relax
+      v(nwtfac) = t2
+      v(stppar) = two - t
+      v(gtstep) = t1*gnorm**2 + t2*ghinvg
+      v(preduc) = -t1*gnorm * ((t2 + one)*gnorm)
+     1                 - t2 * (one + half*t2)*ghinvg
+     2                  - half * (v(gthg)*t1)**2
+      do 80 i = 1, n
+ 80      step(i) = t1*dig(i) + t2*nwtstp(i)
+c
+ 999  return
+c  ***  last line of dbdog follows  ***
+      end
+      subroutine ltvmul(n, x, l, y)
+c
+c  ***  compute  x = (l**t)*y, where  l  is an  n x n  lower
+c  ***  triangular matrix stored compactly by rows.  x and y may
+c  ***  occupy the same storage.  ***
+c
+      integer n
+cal   double precision x(n), l(1), y(n)
+      double precision x(n), l(n*(n+1)/2), y(n)
+c     dimension l(n*(n+1)/2)
+      integer i, ij, i0, j
+      double precision yi, zero
+c/6
+c     data zero/0.d+0/
+c/7
+      parameter (zero=0.d+0)
+c/
+c
+      i0 = 0
+      do 20 i = 1, n
+         yi = y(i)
+         x(i) = zero
+         do 10 j = 1, i
+              ij = i0 + j
+              x(j) = x(j) + yi*l(ij)
+ 10           continue
+         i0 = i0 + i
+ 20      continue
+ 999  return
+c  ***  last card of ltvmul follows  ***
+      end
+      subroutine lupdat(beta, gamma, l, lambda, lplus, n, w, z)
+c
+c  ***  compute lplus = secant update of l  ***
+c
+c  ***  parameter declarations  ***
+c
+      integer n
+cal   double precision beta(n), gamma(n), l(1), lambda(n), lplus(1),
+      double precision beta(n), gamma(n), l(n*(n+1)/2), lambda(n), 
+     1   lplus(n*(n+1)/2),w(n), z(n)
+c     dimension l(n*(n+1)/2), lplus(n*(n+1)/2)
+c
+c--------------------------  parameter usage  --------------------------
+c
+c   beta = scratch vector.
+c  gamma = scratch vector.
+c      l (input) lower triangular matrix, stored rowwise.
+c lambda = scratch vector.
+c  lplus (output) lower triangular matrix, stored rowwise, which may
+c             occupy the same storage as  l.
+c      n (input) length of vector parameters and order of matrices.
+c      w (input, destroyed on output) right singular vector of rank 1
+c             correction to  l.
+c      z (input, destroyed on output) left singular vector of rank 1
+c             correction to  l.
+c
+c-------------------------------  notes  -------------------------------
+c
+c  ***  application and usage restrictions  ***
+c
+c        this routine updates the cholesky factor  l  of a symmetric
+c     positive definite matrix to which a secant update is being
+c     applied -- it computes a cholesky factor  lplus  of
+c     l * (i + z*w**t) * (i + w*z**t) * l**t.  it is assumed that  w
+c     and  z  have been chosen so that the updated matrix is strictly
+c     positive definite.
+c
+c  ***  algorithm notes  ***
+c
+c        this code uses recurrence 3 of ref. 1 (with d(j) = 1 for all j)
+c     to compute  lplus  of the form  l * (i + z*w**t) * q,  where  q
+c     is an orthogonal matrix that makes the result lower triangular.
+c        lplus may have some negative diagonal elements.
+c
+c  ***  references  ***
+c
+c 1.  goldfarb, d. (1976), factorized variable metric methods for uncon-
+c             strained optimization, math. comput. 30, pp. 796-811.
+c
+c  ***  general  ***
+c
+c     coded by david m. gay (fall 1979).
+c     this subroutine was written in connection with research supported
+c     by the national science foundation under grants mcs-7600324 and
+c     mcs-7906671.
+c
+c------------------------  external quantities  ------------------------
+c
+c  ***  intrinsic functions  ***
+c/+
+      double precision dsqrt
+c/
+c--------------------------  local variables  --------------------------
+c
+      integer i, ij, j, jj, jp1, k, nm1, np1
+      double precision a, b, bj, eta, gj, lj, lij, ljj, nu, s, theta,
+     1                 wj, zj
+      double precision one, zero
+c
+c  ***  data initializations  ***
+c
+c/6
+c     data one/1.d+0/, zero/0.d+0/
+c/7
+      parameter (one=1.d+0, zero=0.d+0)
+c/
+c
+c+++++++++++++++++++++++++++++++  body  ++++++++++++++++++++++++++++++++
+c
+      nu = one
+      eta = zero
+      if (n .le. 1) go to 30
+      nm1 = n - 1
+c
+c  ***  temporarily store s(j) = sum over k = j+1 to n of w(k)**2 in
+c  ***  lambda(j).
+c
+      s = zero
+      do 10 i = 1, nm1
+         j = n - i
+         s = s + w(j+1)**2
+         lambda(j) = s
+ 10      continue
+c
+c  ***  compute lambda, gamma, and beta by goldfarb*s recurrence 3.
+c
+      do 20 j = 1, nm1
+         wj = w(j)
+         a = nu*z(j) - eta*wj
+         theta = one + a*wj
+         s = a*lambda(j)
+         lj = dsqrt(theta**2 + a*s)
+         if (theta .gt. zero) lj = -lj
+         lambda(j) = lj
+         b = theta*wj + s
+         gamma(j) = b * nu / lj
+         beta(j) = (a - b*eta) / lj
+         nu = -nu / lj
+         eta = -(eta + (a**2)/(theta - lj)) / lj
+ 20      continue
+ 30   lambda(n) = one + (nu*z(n) - eta*w(n))*w(n)
+c
+c  ***  update l, gradually overwriting  w  and  z  with  l*w  and  l*z.
+c
+      np1 = n + 1
+      jj = n * (n + 1) / 2
+      do 60 k = 1, n
+         j = np1 - k
+         lj = lambda(j)
+         ljj = l(jj)
+         lplus(jj) = lj * ljj
+         wj = w(j)
+         w(j) = ljj * wj
+         zj = z(j)
+         z(j) = ljj * zj
+         if (k .eq. 1) go to 50
+         bj = beta(j)
+         gj = gamma(j)
+         ij = jj + j
+         jp1 = j + 1
+         do 40 i = jp1, n
+              lij = l(ij)
+              lplus(ij) = lj*lij + bj*w(i) + gj*z(i)
+              w(i) = w(i) + lij*wj
+              z(i) = z(i) + lij*zj
+              ij = ij + i
+ 40           continue
+ 50      jj = jj - j
+ 60      continue
+c
+ 999  return
+c  ***  last card of lupdat follows  ***
+      end
+      subroutine lvmul(n, x, l, y)
+c
+c  ***  compute  x = l*y, where  l  is an  n x n  lower triangular
+c  ***  matrix stored compactly by rows.  x and y may occupy the same
+c  ***  storage.  ***
+c
+      integer n
+cal   double precision x(n), l(1), y(n)
+      double precision x(n), l(n*(n+1)/2), y(n)
+c     dimension l(n*(n+1)/2)
+      integer i, ii, ij, i0, j, np1
+      double precision t, zero
+c/6
+c     data zero/0.d+0/
+c/7
+      parameter (zero=0.d+0)
+c/
+c
+      np1 = n + 1
+      i0 = n*(n+1)/2
+      do 20 ii = 1, n
+         i = np1 - ii
+         i0 = i0 - i
+         t = zero
+         do 10 j = 1, i
+              ij = i0 + j
+              t = t + l(ij)*y(j)
+ 10           continue
+         x(i) = t
+ 20      continue
+ 999  return
+c  ***  last card of lvmul follows  ***
+      end
+      subroutine vvmulp(n, x, y, z, k)
+c
+c ***  set x(i) = y(i) * z(i)**k, 1 .le. i .le. n (for k = 1 or -1)  ***
+c
+      integer n, k
+      double precision x(n), y(n), z(n)
+      integer i
+c
+      if (k .ge. 0) go to 20
+      do 10 i = 1, n
+ 10      x(i) = y(i) / z(i)
+      go to 999
+c
+ 20   do 30 i = 1, n
+ 30      x(i) = y(i) * z(i)
+ 999  return
+c  ***  last card of vvmulp follows  ***
+      end
+      subroutine wzbfgs (l, n, s, w, y, z)
+c
+c  ***  compute  y  and  z  for  lupdat  corresponding to bfgs update.
+c
+      integer n
+cal   double precision l(1), s(n), w(n), y(n), z(n)
+      double precision l(n*(n+1)/2), s(n), w(n), y(n), z(n)
+c     dimension l(n*(n+1)/2)
+c
+c--------------------------  parameter usage  --------------------------
+c
+c l (i/o) cholesky factor of hessian, a lower triang. matrix stored
+c             compactly by rows.
+c n (input) order of  l  and length of  s,  w,  y,  z.
+c s (input) the step just taken.
+c w (output) right singular vector of rank 1 correction to l.
+c y (input) change in gradients corresponding to s.
+c z (output) left singular vector of rank 1 correction to l.
+c
+c-------------------------------  notes  -------------------------------
+c
+c  ***  algorithm notes  ***
+c
+c        when  s  is computed in certain ways, e.g. by  gqtstp  or
+c     dbldog,  it is possible to save n**2/2 operations since  (l**t)*s
+c     or  l*(l**t)*s is then known.
+c        if the bfgs update to l*(l**t) would reduce its determinant to
+c     less than eps times its old value, then this routine in effect
+c     replaces  y  by  theta*y + (1 - theta)*l*(l**t)*s,  where  theta
+c     (between 0 and 1) is chosen to make the reduction factor = eps.
+c
+c  ***  general  ***
+c
+c     coded by david m. gay (fall 1979).
+c     this subroutine was written in connection with research supported
+c     by the national science foundation under grants mcs-7600324 and
+c     mcs-7906671.
+c
+c------------------------  external quantities  ------------------------
+c
+c  ***  functions and subroutines called  ***
+c
+      external dotprd, livmul, ltvmul
+      double precision dotprd
+c dotprd returns inner product of two vectors.
+c livmul multiplies l**-1 times a vector.
+c ltvmul multiplies l**t times a vector.
+c
+c  ***  intrinsic functions  ***
+c/+
+      double precision dsqrt
+c/
+c--------------------------  local variables  --------------------------
+c
+      integer i
+      double precision cs, cy, eps, epsrt, one, shs, ys, theta
+c
+c  ***  data initializations  ***
+c
+c/6
+c     data eps/0.1d+0/, one/1.d+0/
+c/7
+      parameter (eps=0.1d+0, one=1.d+0)
+c/
+c
+c+++++++++++++++++++++++++++++++  body  ++++++++++++++++++++++++++++++++
+c
+      call ltvmul(n, w, l, s)
+      shs = dotprd(n, w, w)
+      ys = dotprd(n, y, s)
+      if (ys .ge. eps*shs) go to 10
+         theta = (one - eps) * shs / (shs - ys)
+         epsrt = dsqrt(eps)
+         cy = theta / (shs * epsrt)
+         cs = (one + (theta-one)/epsrt) / shs
+         go to 20
+ 10   cy = one / (dsqrt(ys) * dsqrt(shs))
+      cs = one / shs
+ 20   call livmul(n, z, l, y)
+      do 30 i = 1, n
+ 30      z(i) = cy * z(i)  -  cs * w(i)
+c
+ 999  return
+c  ***  last card of wzbfgs follows  ***
+      end