Source code for qig.duhamel

"""
Duhamel formula for Matrix Exponential Family derivatives.

For ρ = exp(H) where H = ∑ θ_a F_a - ψ(θ)I, the exact derivative is:

    ∂ρ/∂θ_a = ∫₀¹ exp(sH) (F_a - ⟨F_a⟩I) exp((1-s)H) ds

This is the Duhamel (or Dalecki-Krein exponential) formula. The SLD is just
the trapezoid rule approximation (average of s=0 and s=1 endpoints).

QUANTUM DERIVATIVE PRINCIPLE: This is the EXACT formula that respects
operator ordering and preserves Hermiticity.
"""

import numpy as np
from scipy.linalg import expm, eigh
from scipy.integrate import quad_vec



[docs]
def duhamel_derivative(
    rho: np.ndarray,
    H: np.ndarray, 
    F_centered: np.ndarray,
    n_points: int = 50
) -> np.ndarray:
    """
    Compute ∂ρ/∂θ using the Duhamel formula with numerical integration.
    
    ∂ρ/∂θ = ∫₀¹ exp(sH) F_centered exp((1-s)H) ds
    
    where F_centered = F - ⟨F⟩I.
    
    Parameters
    ----------
    rho : ndarray, shape (D, D)
        Density matrix
    H : ndarray, shape (D, D)  
        Hamiltonian H = ∑ θ_a F_a - ψ(θ)I
    F_centered : ndarray, shape (D, D)
        Centered operator F - ⟨F⟩I
    n_points : int
        Number of quadrature points for integration
        Default: 50 (gives ~5e-05 error for typical cases)
        
    Returns
    -------
    drho : ndarray, shape (D, D)
        Derivative ∂ρ/∂θ (exactly Hermitian)
        
    Notes
    -----
    - Uses trapezoid rule for integration
    - For n_points=2, recovers the SLD formula
    - Convergence (typical case): n=50→6e-05, n=100→1.5e-05, n=200→3.6e-06
    - For high precision, use n_points ≥ 200 or use theta_only method instead
    """
    D = rho.shape[0]
    
    # Trapezoid rule
    s_vals = np.linspace(0, 1, n_points)
    ds = s_vals[1] - s_vals[0]
    
    drho = np.zeros((D, D), dtype=complex)
    
    # Precompute matrix exponentials for efficiency
    exp_sH = {}
    exp_1msH = {}
    
    for s in s_vals:
        exp_sH[s] = expm(s * H)
        exp_1msH[s] = expm((1 - s) * H)
    
    # Trapezoid rule integration
    for i, s in enumerate(s_vals):
        integrand = exp_sH[s] @ F_centered @ exp_1msH[s]
        
        if i == 0 or i == len(s_vals) - 1:
            # Endpoints get weight 0.5
            drho += 0.5 * ds * integrand
        else:
            # Interior points get weight 1
            drho += ds * integrand
    
    return drho




[docs]
def duhamel_derivative_block(
    rho: np.ndarray,
    H: np.ndarray,
    F_centered: np.ndarray,
) -> np.ndarray:
    """
    Compute ∂ρ/∂θ using Higham's block-matrix identity (Fréchet derivative of exp).

    This evaluates the same Duhamel integral as `duhamel_derivative_spectral`:

        D exp_H[F_centered] = ∫₀¹ e^{(1-s)H} F_centered e^{sH} ds

    but without eigendecomposition or explicit quadrature. Instead, form the 2n×2n
    block matrix:

        B = [[H, F_centered],
             [0,        H]]

    Then:

        exp(B) = [[exp(H), D exp_H[F_centered]],
                  [0,      exp(H)]]

    so the (1,2) block is the desired derivative.

    Notes
    -----
    - Cost is one `expm` on a (2D)×(2D) matrix.
    - For Hermitian H and Hermitian F_centered, the exact result is Hermitian;
      we symmetrise to guard against tiny numerical asymmetry.

    References
    ----------
    - Higham, N. J. (2008). Functions of Matrices: Theory and Computation. SIAM.
      Chapter 10.
    - Al-Mohy, A. H., & Higham, N. J. (2009). Computing the Fréchet derivative of
      the matrix exponential, with an application to condition number estimation.
      SIAM J. Matrix Anal. Appl., 30(4), 1639–1657.
    """
    n = H.shape[0]
    if H.shape != (n, n) or F_centered.shape != (n, n):
        raise ValueError("H and F_centered must be square matrices of the same shape.")

    Z = np.zeros((n, n), dtype=complex)
    block = np.block([[H, F_centered], [Z, H]])
    exp_block = expm(block)
    drho = exp_block[:n, n:]
    return 0.5 * (drho + drho.conj().T)




[docs]
def expm_frechet_block_1(A: np.ndarray, E: np.ndarray) -> np.ndarray:
    """
    First Fréchet derivative of exp at A in direction E via 2×2 block trick.

    Returns D exp_A[E].
    """
    n = A.shape[0]
    if A.shape != (n, n) or E.shape != (n, n):
        raise ValueError("A and E must be square matrices of the same shape.")
    Z = np.zeros((n, n), dtype=complex)
    block = np.block([[A, E], [Z, A]])
    exp_block = expm(block)
    return exp_block[:n, n:]




[docs]
def expm_frechet_block_2(A: np.ndarray, E: np.ndarray, F: np.ndarray) -> np.ndarray:
    """
    Second Fréchet derivative of exp at A in directions (E, F) via 3×3 block trick.

    Constructs the 3n×3n block matrix:

        [[A, E, 0],
         [0, A, F],
         [0, 0, A]]

    Then (1,3) block of exp is D² exp_A[E, F].
    """
    n = A.shape[0]
    if A.shape != (n, n) or E.shape != (n, n) or F.shape != (n, n):
        raise ValueError("A, E, F must be square matrices of the same shape.")
    Z = np.zeros((n, n), dtype=complex)
    block = np.block([[A, E, Z], [Z, A, F], [Z, Z, A]])
    exp_block = expm(block)
    return exp_block[:n, 2 * n : 3 * n]




[docs]
def expm_frechet_block_3(
    A: np.ndarray, E: np.ndarray, F: np.ndarray, G: np.ndarray
) -> np.ndarray:
    """
    Third Fréchet derivative of exp at A in directions (E, F, G) via 4×4 block trick.

    Constructs the 4n×4n block matrix:

        [[A, E, 0, 0],
         [0, A, F, 0],
         [0, 0, A, G],
         [0, 0, 0, A]]

    Then (1,4) block of exp is D³ exp_A[E, F, G].
    """
    n = A.shape[0]
    if (
        A.shape != (n, n)
        or E.shape != (n, n)
        or F.shape != (n, n)
        or G.shape != (n, n)
    ):
        raise ValueError("A, E, F, G must be square matrices of the same shape.")
    Z = np.zeros((n, n), dtype=complex)
    block = np.block(
        [[A, E, Z, Z], [Z, A, F, Z], [Z, Z, A, G], [Z, Z, Z, A]]
    )
    exp_block = expm(block)
    return exp_block[:n, 3 * n : 4 * n]




[docs]
def duhamel_derivative_simpson(
    rho: np.ndarray,
    H: np.ndarray, 
    F_centered: np.ndarray,
    n_points: int = 51  # Must be odd for Simpson's rule
) -> np.ndarray:
    """
    Compute ∂ρ/∂θ using Duhamel formula with Simpson's rule.
    
    More accurate than trapezoid rule for smooth integrands.
    
    Parameters
    ----------
    rho : ndarray, shape (D, D)
        Density matrix
    H : ndarray, shape (D, D)
        Hamiltonian
    F_centered : ndarray, shape (D, D)
        Centered operator
    n_points : int, odd
        Number of quadrature points (must be odd)
        
    Returns
    -------
    drho : ndarray, shape (D, D)
        Derivative ∂ρ/∂θ
    """
    if n_points % 2 == 0:
        n_points += 1  # Ensure odd
    
    D = rho.shape[0]
    s_vals = np.linspace(0, 1, n_points)
    h = s_vals[1] - s_vals[0]
    
    drho = np.zeros((D, D), dtype=complex)
    
    # Precompute exponentials
    exp_sH = [expm(s * H) for s in s_vals]
    exp_1msH = [expm((1 - s) * H) for s in s_vals]
    
    # Simpson's rule: (h/3)[f0 + 4f1 + 2f2 + 4f3 + 2f4 + ... + 4fn-1 + fn]
    for i in range(n_points):
        integrand = exp_sH[i] @ F_centered @ exp_1msH[i]
        
        if i == 0 or i == n_points - 1:
            weight = h / 3
        elif i % 2 == 1:
            weight = 4 * h / 3
        else:
            weight = 2 * h / 3
        
        drho += weight * integrand
    
    return drho




[docs]
def duhamel_derivative_spectral(
    rho: np.ndarray,
    H: np.ndarray,
    F_centered: np.ndarray,
) -> np.ndarray:
    """
    Compute ∂ρ/∂θ using the Duhamel formula via the spectral representation of H.
    
    This evaluates the Fréchet derivative of the matrix exponential
        D exp_H[F_centered] = ∫₀¹ e^{(1-s)H} F_centered e^{sH} ds
    exactly (up to diagonalisation error), by working in the eigenbasis of H.
    
    In that basis we have, for H = U diag(λ) U†,
        (D exp_H[F_centered])_{ij} =
            { e^{λ_i} F_{ij}          if i = j,
              F_{ij} (e^{λ_i} - e^{λ_j}) / (λ_i - λ_j)  if i ≠ j }.
    
    Parameters
    ----------
    rho : ndarray, shape (D, D)
        Density matrix (unused, included for API symmetry with duhamel_derivative)
    H : ndarray, shape (D, D)
        Hamiltonian H = ∑ θ_a F_a - ψ(θ)I (Hermitian)
    F_centered : ndarray, shape (D, D)
        Centered operator F - ⟨F⟩I
    
    Returns
    -------
    drho : ndarray, shape (D, D)
        Derivative ∂ρ/∂θ (exact up to eigen-decomposition accuracy)
    """
    # Diagonalise H (Hermitian)
    evals, U = eigh(H)
    U_dag = U.conj().T

    # Transform F_centered into eigenbasis of H
    X_tilde = U_dag @ F_centered @ U

    # Build kernel K_ij = (e^{λ_i} - e^{λ_j}) / (λ_i - λ_j),
    # with degenerate / diagonal limit K_ij → e^{λ_i} when λ_i = λ_j.
    lam_i = evals[:, None]
    lam_j = evals[None, :]
    denom = lam_i - lam_j

    with np.errstate(divide="ignore", invalid="ignore"):
        K = (np.exp(lam_i) - np.exp(lam_j)) / denom

    # Handle degenerate eigenvalues (including the diagonal) by taking the limit
    # (e^{λ_i} - e^{λ_j}) / (λ_i - λ_j) → e^{λ_i} as λ_j → λ_i.
    degenerate = np.abs(denom) < 1e-12
    # Use average eigenvalue in the exponent so that when λ_i = λ_j we get e^{λ_i}.
    exp_avg = np.exp(0.5 * (lam_i + lam_j))
    K[degenerate] = exp_avg[degenerate]

    # Apply kernel elementwise in eigenbasis
    d_rho_tilde = K * X_tilde

    # Transform back to original basis
    drho = U @ d_rho_tilde @ U_dag
    return drho




[docs]
def compute_H_from_theta(operators: list, theta: np.ndarray) -> tuple:
    """
    Compute H = K - ψ(θ)I where K = ∑ θ_a F_a.
    
    For the exponential family ρ = exp(H), we have:
    - K = ∑ θ_a F_a (the "bare" Hamiltonian)
    - ψ(θ) = log Tr[exp(K)] (log partition function)
    - H = K - ψ(θ)I (normalized so exp(H) is normalized)
    
    Parameters
    ----------
    operators : list
        Basis operators {F_a}
    theta : ndarray
        Natural parameters
        
    Returns
    -------
    H : ndarray
        Hamiltonian H = K - ψ I
    K : ndarray
        Bare Hamiltonian K = ∑ θ_a F_a
    psi : float
        Log partition function ψ = log Tr[exp(K)]
    """
    # Compute K = ∑ θ_a F_a
    D = operators[0].shape[0]
    K = np.zeros((D, D), dtype=complex)
    for theta_a, F_a in zip(theta, operators):
        K += theta_a * F_a
    
    # Compute ψ = log Tr[exp(K)] with a stabilising shift.
    # exp(K) / Tr(exp(K)) is invariant under K -> K - cI, so shifting by the
    # largest eigenvalue reduces overflow/underflow for large ||theta||.
    lambda_max = float(np.max(np.linalg.eigvalsh(K).real))
    K_shift = K - lambda_max * np.eye(D, dtype=complex)
    exp_K_shift = expm(K_shift)
    Z_shift = np.trace(exp_K_shift)
    psi = float(lambda_max + np.log(Z_shift).real)
    
    # H = K - ψ I (normalized Hamiltonian)
    H = K - psi * np.eye(D, dtype=complex)
    
    return H, K, psi




[docs]
def test_duhamel_convergence():
    """Test convergence of Duhamel integration."""
    from qig.exponential_family import MatrixExponentialFamily
    
    print("=" * 70)
    print("TESTING DUHAMEL FORMULA CONVERGENCE")
    print("=" * 70)
    
    exp_family = MatrixExponentialFamily(n_sites=1, d=2)
    theta = np.array([0.3, 0.5, 0.2])
    
    rho = exp_family.rho_from_theta(theta)
    H, K, psi = compute_H_from_theta(exp_family.operators, theta)
    
    # Verify H is correct: exp(H) should equal ρ
    rho_check = expm(H)
    print(f"\nVerify exp(H) = ρ:")
    print(f"  Max error: {np.max(np.abs(rho_check - rho)):.6e}")
    print(f"  (Should be machine precision)")
    
    # Also verify: ρ = exp(K)/Z
    exp_K = expm(K)
    Z = np.trace(exp_K)
    rho_from_K = exp_K / Z
    print(f"\nVerify ρ = exp(K)/Z:")
    print(f"  Max error: {np.max(np.abs(rho_from_K - rho)):.6e}")
    
    # Test ∂ρ/∂θ_X
    a = 0
    F_a = exp_family.operators[a]
    mean_Fa = np.trace(rho @ F_a).real
    F_centered = F_a - mean_Fa * np.eye(2, dtype=complex)
    
    # Ground truth: finite differences
    eps = 1e-8
    theta_plus = theta.copy()
    theta_plus[a] += eps
    rho_plus = exp_family.rho_from_theta(theta_plus)
    
    theta_minus = theta.copy()
    theta_minus[a] -= eps
    rho_minus = exp_family.rho_from_theta(theta_minus)
    
    drho_fd = (rho_plus - rho_minus) / (2 * eps)
    
    print(f"\n∂ρ/∂θ_X convergence:")
    print(f"{'n_points':<10} {'Error':<15} {'Hermiticity':<15}")
    print("-" * 40)
    
    for n_points in [2, 5, 10, 20, 50, 100]:
        drho_duhamel = duhamel_derivative(rho, H, F_centered, n_points)
        
        err = np.max(np.abs(drho_duhamel - drho_fd))
        herm_err = np.max(np.abs(drho_duhamel - drho_duhamel.conj().T))
        
        print(f"{n_points:<10} {err:<15.6e} {herm_err:<15.6e}")
    
    # Also test SLD (which is n_points=2)
    drho_sld = exp_family.rho_derivative(theta, a)
    err_sld = np.max(np.abs(drho_sld - drho_fd))
    print(f"\nSLD formula error: {err_sld:.6e}")
    print(f"  (Should match n_points=2)")



if __name__ == "__main__":
    test_duhamel_convergence()