#include "KLCalibration.h"
Double_t Calibrate( Klong& KL, TVector3& klPos, Double_t& chi2 ){

  //std::cout<< "Init" << std::endl;
  Int_t    nPi0         = KL.pi0().size();
  Int_t    error_flag   = 0;
  Int_t    fit_updated  = 0;
  Double_t chisq_keep   = HUGE;
  Double_t chisq_keep_v = HUGE;
  Double_t chisq        = -1;

  TVectorD px(nPi0*2);
  TVectorD py(nPi0*2);
  TVectorD pz(nPi0*2);
  TVectorD vx(nPi0*2);
  TVectorD vy(nPi0*2);
  TVectorD vz(nPi0*2);
  TVectorD R(nPi0*2);
  TVectorD gE(nPi0*2);
  TVectorD sx(nPi0*2);
  TVectorD sy(nPi0*2);
  TVectorD sE(nPi0*2);


  Double_t klE = 0;
  Double_t klE0 = 0;
  TVectorD klV(3);
  TVectorD klP(3);
  Double_t kvx = 0;
  Double_t kvy = 0;
  Double_t kvz = 0; 
  
  Int_t nGamma = nPi0*2;
  klV(0) = KL.vx();
  klV(1) = KL.vy();
  klV(2) = KL.vz();

  for( int ip = 0; ip < nPi0; ip++ ){
    vx[ip*2]   = KL.pi0()[ip].g1().x();
    vx[ip*2+1] = KL.pi0()[ip].g2().x();
    vy[ip*2]   = KL.pi0()[ip].g1().y();
    vy[ip*2+1] = KL.pi0()[ip].g2().y();
    vz[ip*2]   = KL.pi0()[ip].g1().z();
    vz[ip*2+1] = KL.pi0()[ip].g2().z();

    px[ip*2]   = KL.pi0()[ip].g1().p3().x();
    px[ip*2+1] = KL.pi0()[ip].g2().p3().x();
    py[ip*2]   = KL.pi0()[ip].g1().p3().y();
    py[ip*2+1] = KL.pi0()[ip].g2().p3().y();
    pz[ip*2]   = KL.pi0()[ip].g1().p3().z();
    pz[ip*2+1] = KL.pi0()[ip].g2().p3().z();

    gE[ip*2]   = KL.pi0()[ip].g1().e();
    gE[ip*2+1] = KL.pi0()[ip].g2().e();

    sx[ip*2]   = KL.pi0()[ip].g1().sigmaX();
    sx[ip*2+1] = KL.pi0()[ip].g2().sigmaX();
    sy[ip*2]   = KL.pi0()[ip].g1().sigmaY();
    sy[ip*2+1] = KL.pi0()[ip].g2().sigmaY();
    sE[ip*2]   = KL.pi0()[ip].g1().sigmaE();
    sE[ip*2+1] = KL.pi0()[ip].g2().sigmaE();

    klE0 += KL.pi0()[ip].g1().e() + KL.pi0()[ip].g2().e();
  }
  /// Checked ^
  //std::cout<< "End set initial parameter" << std::endl;
  
  /*
  TVectorD    a0(18);
  TVectorD    a(18);
  TVectorD    v0(3);
  TVectorD    v(3);
  */
  TMatrixD    a0(18,1);
  TMatrixD    a(18,1);
  TMatrixD    v0(3,1);
  TMatrixD    v(3,1);

  TMatrixDSym Va0(18);

  Int_t idx = 0;
  for( idx = 0; idx < nGamma; idx++){
    a0(idx*3,0)   = vx(idx);
    a0(idx*3+1,0) = vy(idx);
    a0(idx*3+2,0) = gE(idx);
  }
  v0(0,0) = KL.vx();
  v0(1,0) = KL.vy();
  v0(2,0) = KL.vz();

  for( idx = 0; idx < nGamma; idx++){
    Va0(idx*3  , idx*3  ) = TMath::Power(sx(idx),2);
    Va0(idx*3+1, idx*3+1) = TMath::Power(sy(idx),2);
    Va0(idx*3+2, idx*3+2) = TMath::Power(sE(idx),2);
  }

  a = a0;
  v = v0;
  //std::cout<< "KL" << std::endl;
  //std::cout<< KL.vx() << "\t" << KL.vy() << "\t" << KL.vz() << std::endl;
  //std::cout<< v(0,0)  << "\t" << v(1,0)  << "\t" << v(2,0)  << std::endl;
  
  //checked ^
  //std::cout<< "End set a0 v0" << std::endl;
  
  TMatrixD D(nPi0+3, nPi0*6);/// del H / del a  H - 6 component, a - 18 component
  TMatrixD E(nPi0+3, 3);     /// del H / del v  H - 6 component, v - three component
  TMatrixD DT(nPi0*6, nPi0+3);
  TMatrixD ET(3, nPi0+3);
  TMatrixD d(nPi0+3, 1);        /// H
  TMatrixD da0( nPi0*6,1 );
  TMatrixD dv0( 3,1 );
  TMatrixD VD( nPi0+3, nPi0+3 );
  TMatrixD VE( 3,3);
  TMatrixD Lambda0( nPi0+3,1 );
  TMatrixD Lambda( nPi0+3, 1);
  TMatrixD Lambda0T( 1, nPi0+3);
  TMatrixD LambdaT( 1, nPi0+3);
  
  TMatrixD v_keep       = v;
  TMatrixD a_keep       = a;
  TMatrixD VE_keep      = VE;
  TMatrixD VD_keep      = VD;
  TMatrixD Lambda0_keep = Lambda0;
  TMatrixD E_keep       = E;
  TMatrixD D_keep       = D;

  TMatrixD v_keep_v       = v;
  TMatrixD VE_keep_v      = VE;
  TMatrixD VD_keep_v      = VD;
  TMatrixD Lambda0_keep_v = Lambda0;
  TMatrixD E_keep_v       = E;
  TMatrixD D_keep_v       = D;
  //// Checked ^

  //////////////////////////////////////////////////
  /// Start chi2 minimizatioin loop
  //////////////////////////////////////////////////
  //std::cout<< "Loop start" << std::endl;
  Int_t it(0),it_v(0);/// Iteration loop index
  for( it = 0; it < MAX_ITERATIONS; it++){
    chisq_keep_v = HUGE;
    for( it_v = 0; it_v < MAX_ITERATIONS; it_v++){
      //std::cout<<"KLV " << it_v << "\t" <<  klV(2) << std::endl;
      v0 = v;
      error_flag = 0;
      
      TVectorD dpx_dx(nPi0*2);
      TVectorD dpx_dy(nPi0*2);
      TVectorD dpx_dE(nPi0*2);

      TVectorD dpy_dx(nPi0*2);
      TVectorD dpy_dy(nPi0*2);
      TVectorD dpy_dE(nPi0*2);

      TVectorD dpz_dx(nPi0*2);
      TVectorD dpz_dy(nPi0*2);
      TVectorD dpz_dE(nPi0*2);

      TVectorD dR_dx(nPi0*2);
      TVectorD dR_dy(nPi0*2);
      TVectorD dR_dE(nPi0*2);

      TVectorD dpx_dvx(nPi0*2);
      TVectorD dpx_dvy(nPi0*2);
      TVectorD dpx_dvz(nPi0*2);

      TVectorD dpy_dvx(nPi0*2);
      TVectorD dpy_dvy(nPi0*2);
      TVectorD dpy_dvz(nPi0*2);

      TVectorD dpz_dvx(nPi0*2);
      TVectorD dpz_dvy(nPi0*2);
      TVectorD dpz_dvz(nPi0*2);

      TVectorD dR_dvx(nPi0*2);
      TVectorD dR_dvy(nPi0*2);
      TVectorD dR_dvz(nPi0*2);

      double dklpx_dvx, dklpy_dvx, dklpz_dvx;
      double dklpx_dvy, dklpy_dvy, dklpz_dvy;
      double dklpx_dvz, dklpy_dvz, dklpz_dvz;

      dklpx_dvx = dklpy_dvx = dklpz_dvx = 0;
      dklpx_dvy = dklpy_dvy = dklpz_dvy = 0;
      dklpx_dvz = dklpy_dvz = dklpz_dvz = 0;
      klP(0) = 0.;
      klP(1) = 0.;
      klP(2) = 0.;
      klE    = 0.;
      // checked ^
      
      //std::cout<< "c1" << std::endl;

      for( idx = 0; idx < nPi0*2; idx++){
	R(idx) = TMath::Sqrt( TMath::Power( vx(idx) - klV(0),2) +
			      TMath::Power( vy(idx) - klV(1),2) +
			      TMath::Power( vz(idx) - klV(2),2));
	px(idx)= (vx(idx) - klV(0))/R(idx)*gE(idx);
	py(idx)= (vy(idx) - klV(1))/R(idx)*gE(idx);
	pz(idx)= (vz(idx) - klV(2))/R(idx)*gE(idx);

	klP(0) += px(idx);
	klP(1) += py(idx);
	klP(2) += pz(idx);
	klE    += gE(idx);

	dR_dx(idx) = (vx(idx) - klV(0))/R(idx);
	dR_dy(idx) = (vy(idx) - klV(1))/R(idx);
	dR_dE(idx) = 0;

	dpx_dx(idx) = (gE(idx) - px(idx)*dR_dx(idx))/R(idx);
	dpx_dy(idx) = (        - px(idx)*dR_dx(idx))/R(idx);
	dpx_dE(idx) = (vx(idx) - klV(0))/R(idx);

	dpy_dx(idx) = (        - py(idx)*dR_dx(idx))/R(idx);
	dpy_dy(idx) = (gE(idx) - py(idx)*dR_dy(idx))/R(idx);
	dpy_dE(idx) = (vy(idx) - klV(1))/R(idx);

	dpz_dx(idx) = (        - pz(idx)*dR_dx(idx))/R(idx);
	dpz_dy(idx) = (         -pz(idx)*dR_dy(idx))/R(idx);
	dpz_dE(idx) = 0;

	dR_dvx(idx) = -(vx(idx) - klV(0)) / R(idx);
	dR_dvy(idx) = -(vy(idx) - klV(1)) / R(idx);
	dR_dvz(idx) = -(vz(idx) - klV(2)) / R(idx);

	dpx_dvx(idx)= (-gE(idx) - px(idx)*dR_dvx(idx))/R(idx);
	dpx_dvy(idx)= (         - px(idx)*dR_dvy(idx))/R(idx);
	dpx_dvz(idx)= (         - px(idx)*dR_dvz(idx))/R(idx);
	dpy_dvx(idx)= (         - py(idx)*dR_dvx(idx))/R(idx);
	dpy_dvy(idx)= (-gE(idx) - py(idx)*dR_dvy(idx))/R(idx);
	dpy_dvz(idx)= (         - py(idx)*dR_dvz(idx))/R(idx);
	dpz_dvx(idx)= (         - pz(idx)*dR_dvx(idx))/R(idx);
	dpz_dvy(idx)= (         - pz(idx)*dR_dvy(idx))/R(idx);
	dpz_dvz(idx)= (-gE(idx) - pz(idx)*dR_dvz(idx))/R(idx);

	dklpx_dvx += dpx_dvx(idx);
	dklpx_dvy += dpx_dvy(idx);
	dklpx_dvz += dpx_dvz(idx);
	dklpy_dvx += dpy_dvx(idx);
	dklpy_dvy += dpy_dvy(idx);
	dklpy_dvz += dpy_dvz(idx);
	dklpz_dvx += dpz_dvx(idx);
	dklpz_dvy += dpz_dvy(idx);
	dklpz_dvz += dpz_dvz(idx);

      }

      //std::cout<< "c2" << std::endl;
      
      for( idx = 0; idx < nPi0; idx++){
	d(idx,0)
	  = TMath::Power( gE(idx*2) + gE(idx*2+1), 2)
	  - TMath::Power( px(idx*2) + px(idx*2+1), 2)
	  - TMath::Power( py(idx*2) + py(idx*2+1), 2)
	  - TMath::Power( pz(idx*2) + pz(idx*2+1), 2)
	  - MASS_PI0*MASS_PI0;
      }
      d(nPi0,0)  = klE*klE - klP(0)*klP(0) - klP(1)*klP(1) - klP(2)*klP(2) - MASS_KL*MASS_KL;
      d(nPi0+1,0)= -klV(0)*klE;
      d(nPi0+2,0)= -klV(1)*klE;
      for( idx = 0; idx < nPi0; idx++){
	d(nPi0+1,0) += gE(idx*2)*vx(idx*2) + gE(idx*2+1)*vx(idx*2+1);
	d(nPi0+2,0) += gE(idx*2)*vy(idx*2) + gE(idx*2+1)*vx(idx*2+1);
      }

      /// Rest core matrix D & E
      for( int i = 0; i< nPi0+3; i++){
	for( int j = 0; j< nPi0*6; j++){D(i,j) = 0;}
	for( int j = 0; j< 3; j++){     E(i,j) = 0;}
      }
      // Checked ^
      //std::cout<< "Calculate D and E " << std::endl;
      /// Calculate core matrix D & E
      for( idx = 0; idx < nPi0; idx++){

	/// d(idx) = E1*E2 - (|p1 + |p2,2) - MPi*MPi;
	/// Dd(idx)/Dx1
	D(idx, idx*6) =
	  -2.*(px(idx*2) + px(idx*2+1))*dpx_dx(idx*2)
	  -2.*(py(idx*2) + py(idx*2+1))*dpy_dx(idx*2)
	  -2.*(pz(idx*2) + pz(idx*2+1))*dpz_dx(idx*2);
	/// Dd(idx)/Dy1
	D(idx, idx*6+1) =
	  -2.*(px(idx*2) + px(idx*2+1))*dpx_dy(idx*2)
	  -2.*(py(idx*2) + py(idx*2+1))*dpy_dy(idx*2)
	  -2.*(pz(idx*2) + pz(idx*2+1))*dpz_dy(idx*2);
	/// Dd(idx)/Dz1
	D(idx, idx*6+2) =
	  2.*(gE(idx*2) + gE(idx*2+1))
	  -2.*(px(idx*2) + px(idx*2+1))*dpx_dE(idx*2)
	  -2.*(py(idx*2) + py(idx*2+1))*dpy_dE(idx*2)
	  -2.*(pz(idx*2) + pz(idx*2+1))*dpz_dE(idx*2);

	/// Dd(idx)/Dx2
	D(idx, idx*6+3) =
	  -2.*(px(idx*2) + px(idx*2+1))*dpx_dx(idx*2+1)
	  -2.*(py(idx*2) + py(idx*2+1))*dpy_dx(idx*2+1)
	  -2.*(pz(idx*2) + pz(idx*2+1))*dpz_dx(idx*2+1);
	/// Dd(idx)/Dy2
	D(idx, idx*6+4) =
	  -2.*(px(idx*2) + px(idx*2+1))*dpx_dy(idx*2+1)
	  -2.*(py(idx*2) + py(idx*2+1))*dpy_dy(idx*2+1)
	  -2.*(pz(idx*2) + pz(idx*2+1))*dpz_dy(idx*2+1);
	/// Dd(idx)/Dz2
	D(idx, idx*6+5) =
	  2.*(gE(idx*2) + gE(idx*2+1))
	  -2.*(px(idx*2) + px(idx*2+1))*dpx_dE(idx*2+1)
	  -2.*(py(idx*2) + py(idx*2+1))*dpy_dE(idx*2+1)
	  -2.*(pz(idx*2) + pz(idx*2+1))*dpz_dE(idx*2+1);

	/// Dd(idx)/Dklx
	E(idx, 0) =
	  -2.*(px(idx*2) + px(idx*2+1))*(dpx_dvx(idx*2) + dpx_dvx(idx*2+1))
	  -2.*(py(idx*2) + py(idx*2+1))*(dpy_dvx(idx*2) + dpy_dvx(idx*2+1))
	  -2.*(pz(idx*2) + pz(idx*2+1))*(dpz_dvx(idx*2) + dpz_dvx(idx*2+1));
	/// Dd(idx)/Dkly
	E(idx, 1) =
	  -2.*(px(idx*2) + px(idx*2+1))*(dpx_dvy(idx*2) + dpx_dvy(idx*2+1))
	  -2.*(py(idx*2) + py(idx*2+1))*(dpy_dvy(idx*2) + dpy_dvy(idx*2+1))
	  -2.*(pz(idx*2) + pz(idx*2+1))*(dpz_dvy(idx*2) + dpz_dvy(idx*2+1));
	/// Dd(idx)/Dkly
	E(idx, 2) =
	  -2.*(px(idx*2) + px(idx*2+1))*(dpx_dvz(idx*2) + dpx_dvz(idx*2+1))
	  -2.*(py(idx*2) + py(idx*2+1))*(dpy_dvz(idx*2) + dpy_dvz(idx*2+1))
	  -2.*(pz(idx*2) + pz(idx*2+1))*(dpz_dvz(idx*2) + dpz_dvz(idx*2+1));

	/// d(nPi0) = klE*klE - (|klp)^2 - KLM*KLM
	/// Dd(nPi0)/Dx1
	D(nPi0, idx*6) =
	  -2.*klP(0)*dpx_dx(idx*2)
	  -2.*klP(1)*dpy_dx(idx*2)
	  -2.*klP(2)*dpz_dx(idx*2);
	/// Dd(nPi0)/Dy1
	D(nPi0, idx*6+1) =
	  -2.*klP(0)*dpx_dy(idx*2)
	  -2.*klP(1)*dpy_dy(idx*2)
	  -2.*klP(2)*dpz_dy(idx*2);
	/// Dd(nPi0)/DE1
	D(nPi0, idx*6+2) =
	  2.*klE
	  -2.*klP(0)*dpx_dE(idx*2)
	  -2.*klP(1)*dpy_dE(idx*2)
	  -2.*klP(2)*dpz_dE(idx*2);

	/// Dd(nPi0)/Dx2
	D(nPi0, idx*6+3) =
	  -2.*klP(0)*dpx_dx(idx*2+1)
	  -2.*klP(1)*dpy_dx(idx*2+1)
	  -2.*klP(2)*dpz_dx(idx*2+1);
	/// Dd(nPi0)/Dy2
	D(nPi0, idx*6+4) =
	  -2.*klP(0)*dpx_dy(idx*2+1)
	  -2.*klP(1)*dpy_dy(idx*2+1)
	  -2.*klP(2)*dpz_dy(idx*2+1);
	/// Dd(nPi0)/DE2
	D(nPi0, idx*6+5) =
	  2.*klE
	  -2.*klP(0)*dpx_dE(idx*2+1)
	  -2.*klP(1)*dpy_dE(idx*2+1)
	  -2.*klP(2)*dpz_dE(idx*2+1);

	E( nPi0, 0 ) =
	  -2.*klP(0)*dklpx_dvx
	  -2.*klP(1)*dklpy_dvx
	  -2.*klP(2)*dklpz_dvx;
	E( nPi0, 1 ) =
	  -2.*klP(0)*dklpx_dvy
	  -2.*klP(1)*dklpy_dvy
	  -2.*klP(2)*dklpz_dvy;
	E( nPi0, 2 ) =
	  -2.*klP(0)*dklpx_dvz
	  -2.*klP(1)*dklpy_dvz
	  -2.*klP(2)*dklpz_dvz;
	
	
	/// d(nPi0+1) += sum(E1*x1 + E2*x2 -klx*klE)
	/// d(nPi0+2) += sum(E1*y1 + E2*y2 -kly*klE)
	D(nPi0+1, idx*6  ) = gE(idx*2);
	D(nPi0+1, idx*6+1) = 0.;
	D(nPi0+1, idx*6+2) = vx(idx*2)   - klV(0);
	D(nPi0+1, idx*6+3) = gE(idx*2+1);
	D(nPi0+1, idx*6+4) = 0.;
	D(nPi0+1, idx*6+5) = vx(idx*2+1) - klV(0);

	D(nPi0+2, idx*6  ) = 0.;
	D(nPi0+2, idx*6+1) = gE(idx*2);
	D(nPi0+2, idx*6+2) = vy(idx*2)   - klV(1);
	D(nPi0+2, idx*6+3) = 0.;
	D(nPi0+2, idx*6+4) = gE(idx*2+1);
	D(nPi0+2, idx*6+5) = vy(idx*2+1) - klV(1);

	E(nPi0+1,0) = -klE;
	E(nPi0+1,1) = 0;
	E(nPi0+1,2) = 0;
	
	E(nPi0+2,0) = 0.;
	E(nPi0+2,1) = -klE;
	E(nPi0+2,2) = 0.;
      }
      //std::cout<< "Calculate delta" << std::endl;
      da0 = a0 - a;
      dv0 = v0 - v;
      //(D*Va0*D.T()).Invert(VD);
      //std::cout<< "Check" << std::endl;
      //std::cout<< D.GetNrows()   << "\t" << D.GetNcols()  << std::endl;
      //std::cout<< Va0.GetNrows() << "\t" << Va0.GetNcols() << std::endl;
      DT.Transpose(D);
      TDecompLU l0((D*Va0*DT));
      //std::cout<< "Check Invert" << std::endl;
      //// How to check no inversion
      if( !l0.Decompose() ){
	error_flag = -1;
      }else{
	//std::cout<< "Pass check VD" << std::endl;
	l0.Invert(VD);
	//std::cout<< "Invert" << std::endl;
	//std::cout<< "Check" << std::endl;
	//std::cout<< E.GetNrows()  << "\t" << E.GetNcols()  << std::endl;
	//std::cout<< VD.GetNrows() << "\t" << VD.GetNcols() << std::endl;
	//E.Print();
	ET.Transpose(E);
	//ET.Print();
	TDecompLU l1((ET*VD*E));
	if( !l1.Decompose() ){
	  //std::cout<< "Error VD" << std::endl;
	  error_flag = -2;
	}else{
	  //std::cout<< "Pass check VE" << std::endl;
	  l1.Invert(VE);
	  //std::cout<< "Check" << std::endl;
	  //std::cout<< "VE " << VE.GetNrows()  << "\t" << VE.GetNcols() << std::endl;
	  //std::cout<< "VD " << VD.GetNrows()  << "\t" << VD.GetNcols() << std::endl;
	  //std::cout<< "da " << da0.GetNrows() << "\t" << da0.GetNcols() << std::endl;
	  //std::cout<< "d  " << d.GetNrows()   << "\t" << d.GetNcols() << std::endl;
	  //(VE*(ET*VD*E)).Print();
	  Lambda0 = VD*(D*da0 +d );
	  //std::cout<< "Lambda" << std::endl;
	  //Lambda0.Print();
	  //TMatrixD L = D*da0;
	  //TMatrixD M = E*dv0;
	  //std::cout<< Lambda0.GetNrows() << "\t" << Lambda0.GetNcols() << std::endl;
	  //std::cout<< "L  " << L.GetNrows() << "\t" << L.GetNcols() << std::endl;
	  //std::cout<< "M  " << M.GetNrows() << "\t" << M.GetNcols() << std::endl;	  
	  //std::cout<< "v  " << v.GetNrows() << "\t" << v.GetNcols() << std::endl;
	  //std::cout<< "VE " << VE.GetNrows()<< "\t" << VE.GetNcols() << std::endl;
	  //std::cout<< "ET " << ET.GetNrows()<< "\t" << ET.GetNcols() << std::endl;
	  //std::cout<< "d  " << d.GetNrows() << "\t" << d.GetNcols() << std::endl;
	  Lambda0T.Transpose(Lambda0);
	  chisq   = (Lambda0T * (D*da0 + E*dv0 + d ))(0,0);
	  //std::cout<< "v" << std::endl;
	  //std::cout<< "v_pre" << std::endl;
	  //v.Print();
	  //(VE*ET*Lambda0).Print();
	  v       = v - VE*ET*Lambda0;
	  //VE.Print();
	  //E.Print();
	  //ET.Print();
	  //Lambda0.Print();
	  //std::cout<< "v_after" << std::endl;
	  //v.Print();
	  //std::cout<< "End" << std::endl;
	}
      }

      if( chisq < chisq_keep_v &&
	  error_flag == 0 &&
	  chisq >= 0.){
	//std::cout<< "Passed" << std::endl;
	//std::cout<<"Set :" << error_flag << std::endl;
	chisq_keep_v = chisq;
	//std::cout << "test" << std::endl;
	v_keep_v     = v;
	//std::cout << "test" << std::endl;
	VE_keep_v    = VE;
	//std::cout << "test" << std::endl;
	VD_keep_v    = VD;
	//std::cout << "test" << std::endl;
	Lambda0_keep_v = Lambda0;
	//std::cout << "test" << std::endl;
	E_keep_v       = E;
	//std::cout << "test" << std::endl;
	D_keep_v       = D;
	//std::cout << "test" << std::endl;
	fit_updated    = 1;
	//std::cout << "test" << std::endl;
      }else{
	//std::cout<< "Set : END " << error_flag << std::endl;
	chisq   = chisq_keep_v;
	v       = v_keep_v;
	VE      = VE_keep_v;
	VD      = VD_keep_v;
	Lambda0 = Lambda0_keep_v;
	E       = E_keep_v;
	D       = D_keep_v;
	break;
      }
      klV(0) = v(0,0);
      klV(1) = v(1,0);
      klV(2) = v(2,0);
    }//// End of KL vertex minimizing loop
    //std::cout<< "KL vertix minimizing loop end" << std::endl;
    //std::cout<< "Calculate Lambda" << std::endl;
    if( error_flag == 0 ){
      //std::cout<< "Lambda" << std::endl;
      Lambda = Lambda0 - VD * E * VE * ET * Lambda0;
      //std::cout<< "a" << std::endl;
      a      = a0 - Va0 * DT * Lambda;
    }
    if(( it == 0 || chisq < chisq_keep ) &&
       error_flag == 0 &&
       chisq >= 0.){
      //std::cout<< "Update" << std::endl;
      chisq_keep   = chisq;
      v_keep       = v;
      VE_keep      = VE;
      VD_keep      = VD;
      Lambda0_keep = Lambda0;
      E_keep       = E;
      D_keep       = D;
      a_keep       = a;
      fit_updated  = 1;
    }else{
      //std::cout<< "keep" << std::endl;
      chisq   = chisq_keep;
      v       = v_keep;
      VE      = VE_keep;
      VD      = VD_keep;
      Lambda0 = Lambda0_keep;
      E       = E_keep;
      D       = D_keep;
      a       = a_keep;
      break;
    }
    //std::cout<< "rewind" << std::endl;
    for( idx = 0; idx < nPi0*2; idx++){
      //std::cout<< idx << "\t" << a(idx*3, 0) << std::endl;
      vx(idx) = a(idx*3  ,0);
      vy(idx) = a(idx*3+1,0);
      gE(idx) = a(idx*3+2,0);
    }
    //std::cout<< "global minimization" << std::endl;
  }/// End of global minimization loop

  if( fit_updated == 0 ){
    std::cerr << Form("[Error] : Fit failed, return error code = %d.",error_flag)
	      << std::endl;
    chisq =-1.;
    if( error_flag!= 0 ){ return (double) error_flag;
    }else{ return -3.;}
  }
  //std::cout<< "UPDATED" << std::endl;

  // Checked \/
  klP(0) = 0;
  klP(1) = 0;
  klP(2) = 0;
  klE    = 0;
  
  for( idx = 0; idx < nPi0*2; idx++){
    R(idx) = TMath::Sqrt(TMath::Power(vx(idx) - klV(0),2) +
			 TMath::Power(vy(idx) - klV(1),2) +
			 TMath::Power(vz(idx) - klV(2),2));
    px(idx) = (vx(idx) - klV(0))/R(idx) * gE(idx);
    py(idx) = (vy(idx) - klV(1))/R(idx) * gE(idx);
    pz(idx) = (vz(idx) - klV(2))/R(idx) * gE(idx);

    klP(0) += px(idx);
    klP(1) += py(idx);
    klP(2) += pz(idx);
    klE    += gE(idx);
  }

  //// Final error matrix
  //std::cout<< "Final calculation" << std::endl;
  TMatrixD VDtu = VD - VD * E* VE * ET *VD;
  TMatrixD Va   = Va0 - Va0 * DT * VDtu * D *Va0;

  KL.setMass( TMath::Sqrt( klE*klE -klP(0)*klP(0) - klP(1)*klP(1) - klP(2)*klP(2)) );
  KL.setEnergy( klE );
  KL.setP3( klP(0), klP(1), klP(2));
  KL.setVtx( klV(0), klV(1), klV(2));


  //std::cout<< "Set KL data" << std::endl;
  for( idx = 0; idx < nPi0; idx++){
    //std::cout<< idx << std::endl;
    KL.pi0()[idx].setEnergy( gE(idx*2) + gE(idx*2+1));
    KL.pi0()[idx].setMass( TMath::Sqrt(TMath::Power(gE(idx*2) + gE(idx*2+1),2) -
				       TMath::Power(px(idx*2) + px(idx*2+1),2) -
				       TMath::Power(py(idx*2) + py(idx*2+1),2) -
				       TMath::Power(pz(idx*2) + pz(idx*2+1),2)));
    //std::cout<< idx << std::endl;

    KL.pi0()[idx].setVtx( klV(0), klV(1), klV(2));
    KL.pi0()[idx].setP3(px(idx*2)+px(idx*2+1),
			py(idx*2)+py(idx*2+1),
			pz(idx*2)+pz(idx*2+1));
    KL.pi0()[idx].setRecZ( klV(2));
    KL.pi0()[idx].setRecZsig2( VE(2,2) );
    //std::cout<< idx << std::endl;

    KL.pi0()[idx].g1().setEnergy( gE(idx*2));
    KL.pi0()[idx].g1().setPos(vx(idx*2), vy(idx*2), vz(idx*2));
    KL.pi0()[idx].g1().setP3(px(idx*2), py(idx*2), pz(idx*2));
    //std::cout<< idx << std::endl;

    KL.pi0()[idx].g2().setEnergy( gE(idx*2+1));
    KL.pi0()[idx].g2().setPos(vx(idx*2+1), vy(idx*2+1), vz(idx*2+1));
    KL.pi0()[idx].g2().setP3(px(idx*2+1), py(idx*2+1), pz(idx*2+1));
    //std::cout<< idx << std::endl;
  }

  klPos.SetXYZ(KL.vx(), KL.vy(), KL.vz());
  chi2 = chisq;
  return chisq;
}
