Source code for qig.exponential_family

"""
Quantum exponential family and BKM metric interface for the quantum inaccessible game.

This module contains:
- local operator bases (Pauli, Gell-Mann, generalised Gell-Mann);
- construction of the full operator basis {F_a};
- the `QuantumExponentialFamily` class providing ρ(θ), log-partition ψ(θ),
  Fisher/BKM metric G(θ), and the marginal-entropy constraint.
"""

from typing import Tuple, List, Optional, Dict, Any

import numpy as np
from scipy.linalg import expm, eigh, logm

from qig.core import marginal_entropies, partial_trace
from qig.pair_operators import (
    pair_basis_generators,
    multi_pair_basis,
    bell_state_density_matrix,
    product_of_bell_states
)


# ============================================================================
# Operator Basis: Local Lie Algebra Generators
# ============================================================================

[docs] def pauli_basis(site: int, n_sites: int) -> List[np.ndarray]: """ Create Pauli operator basis for site 'site' in an n_sites qubit system. Returns [sigma_x, sigma_y, sigma_z] tensored with identity on other sites. """ # Define Pauli matrices I = np.array([[1, 0], [0, 1]], dtype=complex) X = np.array([[0, 1], [1, 0]], dtype=complex) Y = np.array([[0, -1j], [1j, 0]], dtype=complex) Z = np.array([[1, 0], [0, -1]], dtype=complex) paulis = [X, Y, Z] operators: List[np.ndarray] = [] for P in paulis: # Build tensor product: I ⊗ ... ⊗ P ⊗ ... ⊗ I op = None for i in range(n_sites): current = P if i == site else I op = current if op is None else np.kron(op, current) operators.append(op) return operators
[docs] def gell_mann_matrices() -> List[np.ndarray]: """ Return the 8 Gell-Mann matrices (generators of SU(3)). """ gm: List[np.ndarray] = [] # λ1 and λ2 (off-diagonal in 01 block) gm.append(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 0]], dtype=complex)) gm.append(np.array([[0, -1j, 0], [1j, 0, 0], [0, 0, 0]], dtype=complex)) # λ3 (diagonal in 01 block) gm.append(np.array([[1, 0, 0], [0, -1, 0], [0, 0, 0]], dtype=complex)) # λ4 and λ5 (off-diagonal in 02 block) gm.append(np.array([[0, 0, 1], [0, 0, 0], [1, 0, 0]], dtype=complex)) gm.append(np.array([[0, 0, -1j], [0, 0, 0], [1j, 0, 0]], dtype=complex)) # λ6 and λ7 (off-diagonal in 12 block) gm.append(np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0]], dtype=complex)) gm.append(np.array([[0, 0, 0], [0, 0, -1j], [0, 1j, 0]], dtype=complex)) # λ8 (diagonal) gm.append(np.array([[1, 0, 0], [0, 1, 0], [0, 0, -2]], dtype=complex) / np.sqrt(3)) return gm
[docs] def qutrit_basis(site: int, n_sites: int) -> List[np.ndarray]: """ Create Gell-Mann operator basis for site 'site' in n_sites qutrit system. """ I = np.eye(3, dtype=complex) gm = gell_mann_matrices() operators: List[np.ndarray] = [] for G in gm: op = None for i in range(n_sites): current = G if i == site else I op = current if op is None else np.kron(op, current) operators.append(op) return operators
[docs] def create_operator_basis(n_sites: int, d: int) -> Tuple[list, list]: """ Create full operator basis {F_a} for quantum exponential family. Parameters ----------- n_sites : int Number of sites d : int Local dimension (2 for qubits, 3 for qutrits, d>=2 in general) """ operators: List[np.ndarray] = [] labels: List[str] = [] if d == 2: # Qubits: Pauli basis pauli_names = ["X", "Y", "Z"] for site in range(n_sites): ops = pauli_basis(site, n_sites) operators.extend(ops) labels.extend([f"{name}_{site+1}" for name in pauli_names]) elif d == 3: # Qutrits: Gell-Mann basis for site in range(n_sites): ops = qutrit_basis(site, n_sites) operators.extend(ops) labels.extend([f{k+1}_{site+1}" for k in range(8)]) elif d >= 4: # Higher dimensions: use generalised Gell-Mann matrices for site in range(n_sites): # Create d^2-1 Hermitian traceless operators basis_ops: List[np.ndarray] = [] # Off-diagonal symmetric and antisymmetric for i in range(d): for j in range(i + 1, d): # Symmetric: |i><j| + |j><i| op = np.zeros((d, d), dtype=complex) op[i, j] = 1 op[j, i] = 1 basis_ops.append(op) # Antisymmetric: -i|i><j| + i|j><i| op = np.zeros((d, d), dtype=complex) op[i, j] = -1j op[j, i] = 1j basis_ops.append(op) # Diagonal traceless for k in range(d - 1): op = np.zeros((d, d), dtype=complex) for i in range(k + 1): op[i, i] = 1 op[k + 1, k + 1] = -(k + 1) op = op / np.sqrt(k + 1 + (k + 1) ** 2) basis_ops.append(op) # Tensor with identity on other sites for op_local in basis_ops: op_full = None for s in range(n_sites): current = op_local if s == site else np.eye(d, dtype=complex) op_full = current if op_full is None else np.kron(op_full, current) operators.append(op_full) labels.append(f"H{len(operators)}_{site+1}") else: raise ValueError(f"Dimension d={d} must be >= 2.") return operators, labels
# ============================================================================ # Quantum Exponential Family # ============================================================================
[docs] class QuantumExponentialFamily: """ Quantum exponential family: ρ(θ) = exp(∑ θ_a F_a - ψ(θ)) """
[docs] def __init__(self, n_sites: Optional[int] = None, d: int = 2, n_pairs: Optional[int] = None, pair_basis: bool = False): """ Initialise quantum exponential family. Parameters ----------- n_sites : int, optional Number of subsystems (for local operator basis) d : int Local dimension (2 for qubits, 3 for qutrits) n_pairs : int, optional Number of entangled pairs (for pair basis) pair_basis : bool If True, use su(d²) operators for entangled pairs If False, use local operators (default) Notes ----- Two modes of operation: 1. Local operators (pair_basis=False, default): - Specify n_sites: number of independent subsystems - Uses operators like σ_x⊗I, I⊗σ_y (local Pauli/Gell-Mann) - Can ONLY represent separable states - Suitable for testing, but NOT for quantum game with entanglement 2. Pair operators (pair_basis=True): - Specify n_pairs: number of entangled pairs - Uses su(d²) generators acting on each pair - Can represent entangled states (including Bell states) - Required for proper quantum inaccessible game - Fisher metric G is block-diagonal (one block per pair) """ self.pair_basis = pair_basis if pair_basis: # Pair-based operators for entangled pairs if n_pairs is None: raise ValueError("Must specify n_pairs when using pair_basis=True") self.n_pairs = n_pairs self.n_sites = 2 * n_pairs # Each pair has 2 subsystems self.d = d self.dims = [d] * self.n_sites self.D = d**(2 * n_pairs) # Hilbert space: (d²)^n_pairs # Create pair operator basis self.operators, self.pair_indices = multi_pair_basis(n_pairs, d) self.labels = [f"F{a}_pair{k}" for k in range(n_pairs) for a in range(d**4 - 1)] self.n_params = len(self.operators) print(f"Initialised {n_pairs}-pair system with d={d} (pair basis)") print(f"Number of subsystems: {self.n_sites} (2 per pair)") print(f"Hilbert space dimension: {self.D}") print(f"Number of parameters: {self.n_params} = {n_pairs} × {d**4-1}") else: # Local operators (original mode) if n_sites is None: raise ValueError("Must specify n_sites when using pair_basis=False") self.n_sites = n_sites self.d = d self.dims = [d] * n_sites self.D = d**n_sites self.n_pairs = None self.pair_indices = None # Create operator basis self.operators, self.labels = create_operator_basis(n_sites, d) self.n_params = len(self.operators) print(f"Initialised {n_sites}-site system with d={d} (local basis)") print(f"Hilbert space dimension: {self.D}") print(f"Number of parameters: {self.n_params}")
def _lift_to_full_space(self, op_i: np.ndarray, site_i: int) -> np.ndarray: """ Lift operator on subsystem i to full Hilbert space. This is the adjoint of partial_trace(): it embeds a marginal operator into the full space by tensoring with identity operators on other subsystems. Result: I₀ ⊗ ... ⊗ I_{i-1} ⊗ op_i ⊗ I_{i+1} ⊗ ... ⊗ I_{n-1} Parameters ---------- op_i : ndarray, shape (d_i, d_i) Operator on subsystem i site_i : int Which subsystem (0 to n_sites-1) Returns ------- op_full : ndarray, shape (D, D) Operator on full Hilbert space """ result = None for j, d_j in enumerate(self.dims): if j == site_i: current = op_i else: current = np.eye(d_j, dtype=complex) result = current if result is None else np.kron(result, current) return result def _bkm_kernel(self, rho: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Compute BKM (Bogoliubov-Kubo-Mori) kernel from density matrix eigenvalues. The BKM kernel for the quantum Fisher information metric is: k(p_i, p_j) = (p_i - p_j) / (log p_i - log p_j) for i ≠ j k(p_i, p_i) = p_i for i = j This kernel appears in the BKM inner product: ⟨A, B⟩_BKM = ∑_{i,j} k(p_i, p_j) A[i,j] conj(B[i,j]) where A, B are operators in the eigenbasis of ρ. Parameters ---------- rho : ndarray, shape (D, D) Density matrix Returns ------- k : ndarray, shape (D, D) BKM kernel matrix p : ndarray, shape (D,) Eigenvalues of ρ (clipped to ≥ 1e-14) U : ndarray, shape (D, D) Eigenvectors of ρ (columns) """ from scipy.linalg import eigh p, U = eigh(rho) p = np.clip(p.real, 1e-14, None) # Build kernel matrix p_i = p[:, None] p_j = p[None, :] diff = p_i - p_j log_diff = np.log(p_i) - np.log(p_j) k = np.zeros_like(diff) # For non-degenerate eigenvalues: k(p_i, p_j) = (p_i - p_j)/(log p_i - log p_j) non_degenerate = np.abs(diff) > 1e-14 k[non_degenerate] = diff[non_degenerate] / log_diff[non_degenerate] # For degenerate or near-degenerate eigenvalues: k(p_i, p_j) → (p_i + p_j)/2 ≈ p degenerate = np.abs(diff) <= 1e-14 k[degenerate] = 0.5 * (p_i + p_j)[degenerate] return k, p, U
[docs] def rho_from_theta(self, theta: np.ndarray) -> np.ndarray: """ Compute ρ(θ) = exp(K(θ) - ψ(θ)) where K(θ) = ∑ θ_a F_a. """ # Numerical stability: # ρ = expm(K) / Tr(expm(K)) is invariant under K -> K - cI. # Shifting by the max eigenvalue avoids overflow/underflow in expm(K) # for large ||theta|| (common near pure-state limits). K = np.zeros((self.D, self.D), dtype=complex) for theta_a, F_a in zip(theta, self.operators): K += theta_a * F_a # K is Hermitian for real theta and Hermitian generators. lambda_max = float(np.max(np.linalg.eigvalsh(K).real)) K_shift = K - lambda_max * np.eye(self.D, dtype=complex) rho_unnorm = expm(K_shift) Z = np.trace(rho_unnorm) rho = rho_unnorm / Z return rho
[docs] def psi(self, theta: np.ndarray) -> float: """ Compute the cumulant generating function ψ(θ) = log Tr(exp(∑ θ_a F_a)). This is the log partition function for the exponential family. """ K = sum(theta_a * F_a for theta_a, F_a in zip(theta, self.operators)) lambda_max = float(np.max(np.linalg.eigvalsh(K).real)) K_shift = K - lambda_max * np.eye(self.D, dtype=complex) Z_shift = np.trace(expm(K_shift)) return float(lambda_max + np.log(Z_shift).real)
# Backward compatibility alias
[docs] def log_partition(self, theta: np.ndarray) -> float: """ Deprecated: Use psi() instead. Compute the cumulant generating function ψ(θ) = log Tr(exp(∑ θ_a F_a)). """ import warnings warnings.warn("log_partition() is deprecated, use psi() instead", DeprecationWarning, stacklevel=2) return self.psi(theta)
[docs] def rho_derivative(self, theta: np.ndarray, a: int, **kwargs) -> np.ndarray: """ Compute ∂ρ/∂θ_a using the correct quantum formula. Three main methods available. 1. 'sld': Symmetric Logarithmic Derivative (fast, ~5% error) ∂ρ/∂θ = (1/2)[ρ(F - ⟨F⟩I) + (F - ⟨F⟩I)ρ] 2. 'duhamel': Duhamel exponential formula with numerical quadrature (slower, ~5e-6 error for n_points≈200) ∂ρ/∂θ = ∫₀¹ exp(sH)(F - ⟨F⟩I)exp((1-s)H) ds 3. 'duhamel_spectral' / 'duhamel_bch': Duhamel formula via spectral/BCH representation of H (no explicit s-quadrature, high precision). 4. 'duhamel_block': Higham block-matrix identity for the Fréchet derivative (one 2D×2D matrix exponential; no eigendecomposition, no quadrature). ⚠️ QUANTUM ALERT: Simple ρ(F - ⟨F⟩I) is WRONG for non-commuting operators! Parameters ---------- theta : ndarray, shape (n_params,) Natural parameters a : int Parameter index method : str, default='sld' 'sld' for fast SLD, 'duhamel' for high precision n_points : int, default=100 Quadrature points for Duhamel (ignored for 'sld') Returns ------- drho : ndarray, shape (D, D) Derivative ∂ρ/∂θ_a (Hermitian matrix) Notes ----- Quantum derivative principles applied: ✅ Check operator commutation: Uses symmetric/integral form ✅ Verify operator ordering: Preserves ordering in exponentials ✅ Distinguish quantum vs classical: Uses quantum formulas ✅ Respect Hilbert space structure: Preserves Hermiticity """ method = kwargs.get('method', 'sld') n_points = kwargs.get('n_points', 200) # n=200 gives ~3.6e-06 error; n=100→1.5e-05 rho = self.rho_from_theta(theta) F_a = self.operators[a] mean_Fa = np.trace(rho @ F_a).real I = np.eye(self.D, dtype=complex) F_centered = F_a - mean_Fa * I if method == 'sld': # Symmetric logarithmic derivative (fast approximation) drho = 0.5 * (rho @ F_centered + F_centered @ rho) elif method == 'duhamel': # High-precision Duhamel formula via numerical quadrature from qig.duhamel import duhamel_derivative, compute_H_from_theta H, K, psi = compute_H_from_theta(self.operators, theta) drho = duhamel_derivative(rho, H, F_centered, n_points) elif method == 'duhamel_block': # Higham block-matrix identity (Fréchet derivative via a single expm on 2D×2D) from qig.duhamel import duhamel_derivative_block, compute_H_from_theta H, K, psi = compute_H_from_theta(self.operators, theta) drho = duhamel_derivative_block(rho, H, F_centered) elif method in ('duhamel_spectral', 'duhamel_bch'): # Duhamel via spectral/BCH representation of H (no explicit s-quadrature) from qig.duhamel import duhamel_derivative_spectral, compute_H_from_theta H, K, psi = compute_H_from_theta(self.operators, theta) drho = duhamel_derivative_spectral(rho, H, F_centered) else: raise ValueError( f"Unknown method: {method}. " "Use 'sld', 'duhamel', 'duhamel_block', or 'duhamel_spectral'/'duhamel_bch'" ) return drho
[docs] def rho_second_derivative(self, theta: np.ndarray, a: int, b: int, method: str = 'numerical_duhamel', n_points: int = 100, eps: float = 1e-7) -> np.ndarray: """ Compute ∂²ρ/∂θ_a∂θ_b using high-precision numerical differentiation. Strategy: Use finite differences of the high-precision Duhamel ∂ρ/∂θ. ∂²ρ/∂θ_a∂θ_b ≈ [∂ρ/∂θ_a(θ+ε·e_b) - ∂ρ/∂θ_a(θ-ε·e_b)] / (2ε) This gives 0.55-2.6% error, which is 30-100× better than the analytic SLD-based formula. Parameters ---------- theta : ndarray, shape (n_params,) Natural parameters a : int First parameter index b : int Second parameter index method : str, default='numerical_duhamel' Currently only 'numerical_duhamel' is supported n_points : int, default=100 Quadrature points for Duhamel integration eps : float, default=1e-7 Finite difference step size Returns ------- d2rho : ndarray, shape (D, D) Second derivative ∂²ρ/∂θ_a∂θ_b (Hermitian matrix) Notes ----- - Uses central differences for better accuracy - Hermiticity preserved to machine precision - More accurate than analytic SLD-based second derivative Quantum derivative principles applied: ✅ Respects non-commutativity through Duhamel integration ✅ Preserves Hermiticity ✅ Uses high-precision first derivatives """ if method != 'numerical_duhamel': raise ValueError(f"Only 'numerical_duhamel' supported, got {method}") # Compute ∂ρ/∂θ_a at θ + ε·e_b theta_plus = theta.copy() theta_plus[b] += eps drho_a_plus = self.rho_derivative(theta_plus, a, method='duhamel', n_points=n_points) # Compute ∂ρ/∂θ_a at θ - ε·e_b theta_minus = theta.copy() theta_minus[b] -= eps drho_a_minus = self.rho_derivative(theta_minus, a, method='duhamel', n_points=n_points) # Central difference d2rho = (drho_a_plus - drho_a_minus) / (2 * eps) return d2rho
[docs] def fisher_information(self, theta: np.ndarray) -> np.ndarray: """ Compute Fisher information (BKM metric) G(θ) = ∇∇ψ(θ) using the Kubo-Mori / BKM inner product. For a quantum exponential family ``ρ(θ) = exp(K(θ)) / Z(θ)`` with ``K(θ) = ∑_a θ_a F_a``, the Bogoliubov-Kubo-Mori metric is: .. math:: G_{ab}(θ) = \\int_0^1 \\mathrm{Tr}\\left( ρ(θ)^s \\tilde{F}_a ρ(θ)^{1-s} \\tilde{F}_b \\right) \\mathrm{d}s where ``F̃_a = F_a - Tr[ρ(θ) F_a] I`` are centred sufficient statistics. In the eigenbasis of ρ(θ), this reduces to the spectral representation with the Morozova-Chentsov function ``c(λ, μ) = (log λ - log μ)/(λ - μ)`` (diagonal limit: ``c(λ, λ) = 1/λ``). When all F_a commute with ρ(θ) (classical case), this reduces to the usual covariance Fisher information matrix. This implementation: - Diagonalises ρ(θ) = U diag(p) U† - Centres each F_a in that basis - Applies the BKM kernel c(p_i, p_j) to all matrix elements - Symmetrises G to guard against numerical asymmetries """ # Step 1: spectral decomposition of ρ(θ) rho = self.rho_from_theta(theta) eigvals, U = eigh(rho) # ρ is Hermitian # Regularise eigenvalues to avoid log(0); keep normalisation approximate eps_p = 1e-14 p = np.clip(np.real(eigvals), eps_p, None) # Step 2: build centred sufficient statistics in eigenbasis of ρ D = self.D n = self.n_params A_tilde = np.zeros((n, D, D), dtype=complex) I = np.eye(D, dtype=complex) for a, F_a in enumerate(self.operators): # Centre F_a: F_a - ⟨F_a⟩_ρ I mean_Fa = np.trace(rho @ F_a).real A_a = F_a - mean_Fa * I # Transform to eigenbasis of ρ A_tilde[a] = U.conj().T @ A_a @ U # Step 3: construct BKM kernel k(p_i, p_j) p_i = p[:, None] p_j = p[None, :] diff = p_i - p_j log_diff = np.log(p_i) - np.log(p_j) # k(λ, μ) = (λ - μ)/(log λ - log μ), with diagonal limit k(λ, λ) = λ # This is the standard Kubo–Mori / BKM kernel. In the diagonal # (commuting) case it reduces to the classical covariance weight p_i. k = np.zeros_like(diff) # For non-degenerate eigenvalues: k(p_i, p_j) = (p_i - p_j)/(log p_i - log p_j) non_degenerate = np.abs(diff) > 1e-14 k[non_degenerate] = diff[non_degenerate] / log_diff[non_degenerate] # For degenerate or near-degenerate eigenvalues: k(p_i, p_j) → p_i = p_j # This includes both diagonal (i==j) and off-diagonal (i≠j but p_i ≈ p_j) # Use the average (p_i + p_j)/2 which equals p_i (or p_j) when they're equal degenerate = np.abs(diff) <= 1e-14 k[degenerate] = 0.5 * (p_i + p_j)[degenerate] # Step 4: assemble G_ab = Σ_{i,j} k(p_i, p_j) A_a[i,j] conj(A_b[i,j]) G = np.zeros((n, n)) for a in range(n): A_a = A_tilde[a] for b in range(a, n): A_b = A_tilde[b] # For Hermitian operators in the eigenbasis of ρ, the BKM metric is: # G_ab = ∑_{i,j} k(p_i, p_j) * A_a[i,j] * conj(A_b[i,j]) # where k(p_i, p_j) = (p_i - p_j)/(log p_i - log p_j). prod = A_a * np.conj(A_b) Gab = np.sum(k * prod) # BKM inner product is real for Hermitian observables; take Re. Gab_real = float(np.real(Gab)) G[a, b] = Gab_real G[b, a] = Gab_real # Symmetrise to enforce exact symmetry numerically G = 0.5 * (G + G.T) return G
[docs] def fisher_information_product( self, theta: np.ndarray, check_product: bool = True, tol: float = 1e-6 ) -> np.ndarray: """ Compute Fisher information exploiting block-diagonal structure for product states. For product states ρ = ρ₁ ⊗ ρ₂ ⊗ ... ⊗ ρₙ with pair-based operators, the BKM Fisher metric is block-diagonal: G = diag(G₁, G₂, ..., Gₙ) where each Gₖ is computed from the marginal ρₖ on pair k. Complexity: - Full: O(D³) = O(d^(6n)) where D = d^(2n) - Block: O(n × d¹²) - exponentially faster for n > 1 Parameters ---------- theta : ndarray, shape (n_params,) Natural parameters check_product : bool, default True If True, verify state is approximately a product state. If False, assume product structure (faster but may be wrong). tol : float, default 1e-6 Tolerance for product state check Returns ------- G : ndarray, shape (n_params, n_params) Block-diagonal Fisher information matrix Raises ------ ValueError If not using pair_basis mode If check_product=True and state is not a product state Notes ----- For n qutrit pairs (d=3): - n=2: Full O(81³)=530k, Block O(2×3¹²)=1M → similar - n=3: Full O(729³)=387M, Block O(3×3¹²)=1.6M → 240× faster - n=4: Full O(6561³)=282B, Block O(4×3¹²)=2.1M → 134000× faster The crossover point where block computation wins is typically n≥2 for qutrits and n≥3 for qubits. """ if not self.pair_basis: raise ValueError( "fisher_information_product requires pair_basis=True. " "For local operators, use fisher_information()." ) from .pair_operators import pair_basis_generators rho = self.rho_from_theta(theta) # Optionally check if state is approximately a product state if check_product: if not self._is_product_state(rho, tol=tol): raise ValueError( f"State is not a product state (within tol={tol}). " "Use fisher_information() for entangled states, or " "set check_product=False if you know the structure is block-diagonal." ) # Get single-pair generators (d²×d² matrices) single_pair_generators = pair_basis_generators(self.d) n_ops_per_pair = len(single_pair_generators) # d⁴ - 1 # Compute Fisher metric block for each pair G = np.zeros((self.n_params, self.n_params)) for k in range(self.n_pairs): # Extract marginal ρₖ on pair k rho_k = self._partial_trace_to_pair(rho, k) # Compute BKM metric for this pair G_k = self._fisher_block(rho_k, single_pair_generators) # Insert into block-diagonal structure start_idx = k * n_ops_per_pair end_idx = (k + 1) * n_ops_per_pair G[start_idx:end_idx, start_idx:end_idx] = G_k return G
def _fisher_block( self, rho: np.ndarray, operators: List[np.ndarray] ) -> np.ndarray: """ Compute Fisher information block for a single subsystem. This is the BKM metric computed from a single marginal density matrix and its associated operators. Parameters ---------- rho : ndarray, shape (d², d²) Density matrix for a single pair operators : List[ndarray] List of d⁴-1 operators, each of shape (d², d²) Returns ------- G : ndarray, shape (d⁴-1, d⁴-1) Fisher information block """ from scipy.linalg import eigh D = rho.shape[0] n = len(operators) # Eigendecomposition eigvals, U = eigh(rho) p = np.clip(np.real(eigvals), 1e-14, None) # Centre operators and transform to eigenbasis A_tilde = np.zeros((n, D, D), dtype=complex) I = np.eye(D, dtype=complex) for a, F_a in enumerate(operators): mean_Fa = np.trace(rho @ F_a).real A_a = F_a - mean_Fa * I A_tilde[a] = U.conj().T @ A_a @ U # BKM kernel p_i = p[:, None] p_j = p[None, :] diff = p_i - p_j log_diff = np.log(p_i) - np.log(p_j) k = np.zeros_like(diff) non_degenerate = np.abs(diff) > 1e-14 k[non_degenerate] = diff[non_degenerate] / log_diff[non_degenerate] degenerate = np.abs(diff) <= 1e-14 k[degenerate] = 0.5 * (p_i + p_j)[degenerate] # Assemble metric G = np.zeros((n, n)) for a in range(n): A_a = A_tilde[a] for b in range(a, n): A_b = A_tilde[b] prod = A_a * np.conj(A_b) Gab = float(np.real(np.sum(k * prod))) G[a, b] = Gab G[b, a] = Gab return 0.5 * (G + G.T) def _is_product_state(self, rho: np.ndarray, tol: float = 1e-6) -> bool: """ Check if ρ is approximately a product state ρ₁ ⊗ ... ⊗ ρₙ. Uses the criterion: ρ is a product iff ρ = ⊗ₖ Trₖ̄(ρ) where Trₖ̄ traces out all pairs except k. Parameters ---------- rho : ndarray Density matrix tol : float Tolerance for comparison Returns ------- bool True if ρ is approximately a product state """ if not self.pair_basis or self.n_pairs == 1: return True # Single pair is trivially "product" # Compute product of marginals D_pair = self.d ** 2 rho_product = np.array([[1.0]]) for k in range(self.n_pairs): rho_k = self._partial_trace_to_pair(rho, k) rho_product = np.kron(rho_product, rho_k) return np.allclose(rho, rho_product, atol=tol)
[docs] def marginal_entropy_constraint_theta_only( self, theta: np.ndarray ) -> Tuple[float, np.ndarray]: """ Compute constraint C(θ) = ∑ᵢ hᵢ and gradient ∇C using θ-only formulas. This is a fast, exact method that avoids materializing ∂ρ/∂θ for each parameter. Instead, it uses the BKM inner product: ∂C/∂θ_a = ⟨F̃_a, B⟩_BKM where: F̃_a = F_a - ⟨F_a⟩I (centered operator) B = ∑ᵢ Bᵢ (lifted test operator) Bᵢ = (log ρᵢ + Iᵢ) ⊗ I_rest This reuses the eigendecomposition and BKM kernel from fisher_information(), achieving machine precision (~10⁻¹⁴) with ~100× speedup over Duhamel method. Parameters ---------- theta : ndarray Natural parameters Returns ------- C : float Constraint value ∑ᵢ hᵢ grad_C : ndarray, shape (n_params,) Gradient ∇C """ rho = self.rho_from_theta(theta) h = marginal_entropies(rho, self.dims) C = float(np.sum(h)) # Get eigendecomposition and BKM kernel k, p, U = self._bkm_kernel(rho) # Build lifted test operator B = ∑ᵢ (log ρᵢ + Iᵢ) ⊗ I_rest B_full = np.zeros((self.D, self.D), dtype=complex) for i in range(self.n_sites): # Compute marginal ρᵢ rho_i = partial_trace(rho, self.dims, keep=i) # Compute log(ρᵢ) safely using eigendecomposition eigvals_i, eigvecs_i = eigh(rho_i) eigvals_i = np.maximum(eigvals_i.real, 1e-14) log_eigvals_i = np.log(eigvals_i) log_rho_i = eigvecs_i @ np.diag(log_eigvals_i) @ eigvecs_i.conj().T # Lifted operator: (log ρᵢ + Iᵢ) ⊗ I_rest B_i = log_rho_i + np.eye(self.dims[i], dtype=complex) B_full += self._lift_to_full_space(B_i, i) # Transform B to eigenbasis of ρ B_tilde = U.conj().T @ B_full @ U # Compute gradient via BKM inner products grad_C = np.zeros(self.n_params) I_full = np.eye(self.D, dtype=complex) for a, F_a in enumerate(self.operators): # Center operator: F̃_a = F_a - ⟨F_a⟩I mean_Fa = np.trace(rho @ F_a).real F_tilde = F_a - mean_Fa * I_full # Transform to eigenbasis F_tilde_eigen = U.conj().T @ F_tilde @ U # BKM inner product: ⟨F̃_a, B⟩_BKM = ∑ᵢⱼ k[i,j] F̃_a[i,j] conj(B[i,j]) grad_C[a] = -np.real(np.sum(k * (F_tilde_eigen * np.conj(B_tilde)))) return C, grad_C
[docs] def marginal_entropy_constraint( self, theta: np.ndarray, method: str = 'theta_only' ) -> Tuple[float, np.ndarray]: """ Compute constraint value C(θ) = ∑_i h_i and gradient ∇C. This method dispatches to different implementations based on the method parameter. Parameters ---------- theta : ndarray Natural parameters method : str, default='theta_only' Method for gradient computation. Options: - ``'theta_only'``: Fast θ-only method using BKM inner products (default). ~100× faster, machine precision accuracy. - ``'duhamel'``: Legacy method materializing ∂ρ/∂θ via Duhamel. Slow but kept for verification. - ``'sld'``: Legacy method using SLD approximation. Faster than Duhamel but ~5% error. Returns ------- C : float Constraint value ∑ᵢ hᵢ grad_C : ndarray Gradient ∇C """ if method == 'theta_only': return self.marginal_entropy_constraint_theta_only(theta) # Legacy method: materialize ∂ρ/∂θ for each parameter rho = self.rho_from_theta(theta) h = marginal_entropies(rho, self.dims) C = float(np.sum(h)) # Compute gradient by materializing drho grad_C = np.zeros(self.n_params) I = np.eye(self.D, dtype=complex) for a in range(self.n_params): F_a = self.operators[a] # Compute ∂ρ/∂θ_a using specified method (duhamel or sld) drho_dtheta_a = self.rho_derivative(theta, a, method=method) # Sum over all subsystems for i in range(self.n_sites): # Compute marginal ρ_i rho_i = partial_trace(rho, self.dims, keep=i) # Compute ∂ρ_i/∂θ_a (partial trace of ∂ρ/∂θ_a) drho_i_dtheta_a = partial_trace(drho_dtheta_a, self.dims, keep=i) # Compute log(ρ_i) with regularisation eigvals_i, eigvecs_i = eigh(rho_i) eigvals_i = np.maximum(eigvals_i.real, 1e-14) log_eigvals_i = np.log(eigvals_i) log_rho_i = eigvecs_i @ np.diag(log_eigvals_i) @ eigvecs_i.conj().T # Compute ∂h_i/∂θ_a = -Tr((∂ρ_i/∂θ_a) log(ρ_i)) dh_i_dtheta_a = -np.trace(drho_i_dtheta_a @ log_rho_i).real grad_C[a] += dh_i_dtheta_a return C, grad_C
[docs] def third_cumulant_contraction(self, theta: np.ndarray, method: str = 'fd') -> np.ndarray: """ Compute (∇G)[θ], the third cumulant tensor contracted with θ. This is the matrix with (i,j) entry: ∑_k (∂G_ik/∂θ_j) θ_k Following the paper's Appendix (eq. 824-826), this appears in the Jacobian as: M = -G - (∇G)[θ] + ... The third cumulant ∇G = ∇³ψ is totally symmetric in all three indices. We compute ∂G_ab/∂θ_c by differentiating the spectral BKM formula using perturbation theory for eigenvalues and eigenvectors. Parameters ---------- theta : ndarray, shape (n_params,) Natural parameters method : str, optional Method for computing the third cumulant: - 'fd' (default): Finite differences of Fisher metric (fast, accurate) - 'analytic': Analytic perturbation theory (slow, approximate) Returns ------- contraction : ndarray, shape (n_params, n_params) Matrix (∇G)[θ] with (i,j) entry = ∑_k (∂G_ik/∂θ_j) θ_k Notes ----- Quantum derivative principles applied: ✅ Check operator commutation: F_a, F_b, F_c may not commute ✅ Verify operator ordering: Careful in spectral differentiation ✅ Distinguish quantum vs classical: Uses quantum covariance derivatives ✅ Respect Hilbert space structure: Works on full Hilbert space ✅ Question each derivative step: Uses perturbation theory The finite difference method is much faster (~100-500×) and avoids expensive ∂ρ/∂θ computations. """ if method == 'fd': return self._third_cumulant_contraction_fd(theta) elif method == 'analytic': return self._third_cumulant_contraction_analytic(theta) elif method == 'block': return self._third_cumulant_contraction_block(theta) else: raise ValueError(f"Unknown method '{method}'. Use 'fd', 'analytic', or 'block'.")
[docs] def psi_hessian_block( self, theta: np.ndarray, param_indices: Optional[List[int]] = None ) -> np.ndarray: """ Compute Hessian of ψ(θ) = log Tr(exp(K(θ))) via block Fréchet derivatives. This returns the (selected) 2nd cumulant matrix: ∂²ψ/∂θ_a∂θ_b Notes ----- This method is intended for validation / small systems. It scales as O(m²) block matrix exponentials, where m = len(param_indices) (or n_params). """ from scipy.linalg import expm from qig.duhamel import expm_frechet_block_2 if param_indices is None: param_indices = list(range(self.n_params)) m = len(param_indices) # Build K(θ) = Σ θ_a F_a K = np.zeros((self.D, self.D), dtype=complex) for theta_a, F_a in zip(theta, self.operators): K += theta_a * F_a expK = expm(K) Z = np.trace(expK) Z_real = float(np.real(Z)) if not np.isfinite(Z_real) or abs(Z) < 1e-300: raise FloatingPointError("Trace(exp(K)) is not finite; reduce ||theta||.") # First derivatives of Z: dZ_a = Tr(exp(K) F_a) dZ = np.zeros(m, dtype=float) for i, a in enumerate(param_indices): dZ[i] = float(np.real(np.trace(expK @ self.operators[a]))) # Second derivatives of Z via block trick: d2Z_ab = Tr(D² exp_K[F_a, F_b]) d2Z = np.zeros((m, m), dtype=float) for i, a in enumerate(param_indices): E = self.operators[a] for j, b in enumerate(param_indices[i:], start=i): F = self.operators[b] # The 3×3 block construction gives an *ordered* second-derivative piece. # The true 2nd Fréchet derivative is symmetric in (E, F), so we sum both # orderings (no 1/2 factor). d2_ef = np.trace(expm_frechet_block_2(K, E, F)) d2_fe = np.trace(expm_frechet_block_2(K, F, E)) d2_real = float(np.real(d2_ef + d2_fe)) d2Z[i, j] = d2_real d2Z[j, i] = d2_real # Hessian of log Z: ψ'' = Z''/Z - (Z'/Z)(Z'/Z)^T H = d2Z / Z_real - np.outer(dZ, dZ) / (Z_real * Z_real) return 0.5 * (H + H.T)
def _third_cumulant_contraction_block(self, theta: np.ndarray) -> np.ndarray: """ Compute (∇G)[θ] using 3rd derivatives of ψ via block Fréchet derivatives. This is intended for small systems (e.g. single qubit / qutrit) and validation. For larger models use method='fd' (default). """ from itertools import permutations from scipy.linalg import expm from qig.duhamel import expm_frechet_block_2, expm_frechet_block_3 n = self.n_params if n > 20: raise ValueError( "method='block' is intended for small n_params (≤20). " "Use method='fd' for larger systems." ) # Build K(θ) and direction ΔK = Σ θ_a F_a = K itself. K = np.zeros((self.D, self.D), dtype=complex) for theta_a, F_a in zip(theta, self.operators): K += theta_a * F_a K_dir = K expK = expm(K) Z = np.trace(expK) Z_real = float(np.real(Z)) if not np.isfinite(Z_real) or abs(Z) < 1e-300: raise FloatingPointError("Trace(exp(K)) is not finite; reduce ||theta||.") # First derivatives dZ_a and dZ_K dZ = np.array([float(np.real(np.trace(expK @ F_a))) for F_a in self.operators]) dZK = float(np.real(np.trace(expK @ K_dir))) # Second derivatives needed: d2Z_ab (all pairs) and d2Z_aK (vector) d2Z = np.zeros((n, n), dtype=float) for a in range(n): E = self.operators[a] for b in range(a, n): F = self.operators[b] # Sum both orderings to obtain the symmetric 2nd Fréchet derivative. d2_ef = np.trace(expm_frechet_block_2(K, E, F)) d2_fe = np.trace(expm_frechet_block_2(K, F, E)) val = float(np.real(d2_ef + d2_fe)) d2Z[a, b] = val d2Z[b, a] = val d2Z_aK = np.zeros(n, dtype=float) for a in range(n): E = self.operators[a] d2_eK = np.trace(expm_frechet_block_2(K, E, K_dir)) d2_Ke = np.trace(expm_frechet_block_2(K, K_dir, E)) d2Z_aK[a] = float(np.real(d2_eK + d2_Ke)) # Third derivative contraction: ψ3(F_a, F_b, K_dir) contraction = np.zeros((n, n), dtype=float) for a in range(n): E = self.operators[a] for b in range(a, n): F = self.operators[b] # The 4×4 block construction yields ordered 3rd-derivative pieces. # The true 3rd Fréchet derivative is symmetric multilinear, so we sum # over all 3! orderings. dirs = (E, F, K_dir) d3_sum = 0.0 + 0.0j for perm in permutations(dirs, 3): d3_sum += np.trace(expm_frechet_block_3(K, perm[0], perm[1], perm[2])) d3Z = float(np.real(d3_sum)) # ψ3 = Z'''/Z - (Z'' Z' + perms)/Z^2 + 2 Z'Z'Z'/Z^3 term1 = d3Z / Z_real term2 = ( d2Z[a, b] * dZK + d2Z_aK[a] * dZ[b] + d2Z_aK[b] * dZ[a] ) / (Z_real * Z_real) term3 = 2.0 * dZ[a] * dZ[b] * dZK / (Z_real * Z_real * Z_real) val = term1 - term2 + term3 contraction[a, b] = val contraction[b, a] = val return 0.5 * (contraction + contraction.T) def _third_cumulant_contraction_fd(self, theta: np.ndarray, eps: float = 1e-5) -> np.ndarray: """ Compute (∇G)[θ] using finite differences of the Fisher metric. Fast method: ∂G_ab/∂θ_c ≈ [G_ab(θ + ε·e_c) - G_ab(θ - ε·e_c)] / (2ε) Then contract: (∇G)[θ]_ab = Σ_c (∂G_ab/∂θ_c) θ_c Expected speedup: 100-500× over analytic method. Expected accuracy: ~10⁻⁸. """ n = self.n_params # Compute ∂G_ab/∂θ_c for all c by finite differences dG_dtheta = np.zeros((n, n, n)) # [a, b, c] theta_perturbed = theta.copy() for c in range(n): # Forward perturbation theta_perturbed[c] = theta[c] + eps G_plus = self.fisher_information(theta_perturbed) # Backward perturbation theta_perturbed[c] = theta[c] - eps G_minus = self.fisher_information(theta_perturbed) # Central difference dG_dtheta[:, :, c] = (G_plus - G_minus) / (2 * eps) # Reset theta_perturbed[c] = theta[c] # Contract with θ: (∇G)[θ]_ab = Σ_c (∂G_ab/∂θ_c) θ_c contraction = np.einsum('abc,c->ab', dG_dtheta, theta) return contraction def _third_cumulant_contraction_analytic(self, theta: np.ndarray) -> np.ndarray: """ Compute (∇G)[θ] using analytic perturbation theory. This is the original implementation - slow but exact (modulo approximations). Kept for reference and validation. """ rho = self.rho_from_theta(theta) # Eigendecomposition of ρ eigvals, eigvecs = eigh(rho) eigvals = np.maximum(eigvals.real, 1e-14) # Regularise # Compute BKM kernel k(p_i, p_j) def bkm_kernel(p_i, p_j, eps=1e-14): """BKM kernel: (p - q)/(log p - log q) if p ≠ q, else p.""" if np.abs(p_i - p_j) < eps: return p_i else: return (p_i - p_j) / (np.log(p_i) - np.log(p_j)) # Compute centered operators in eigenbasis def centered_operator_eigenbasis(F_a): """Compute F̃_a = F_a - ⟨F_a⟩I in eigenbasis of ρ.""" mean_Fa = np.trace(rho @ F_a).real F_a_centered = F_a - mean_Fa * np.eye(self.D, dtype=complex) # Transform to eigenbasis return eigvecs.conj().T @ F_a_centered @ eigvecs # Compute ∂ρ/∂θ for all parameters I = np.eye(self.D, dtype=complex) drho_dtheta = [] for a in range(self.n_params): drho_dtheta.append(self.rho_derivative(theta, a)) # Compute eigenvalue derivatives using Hellmann-Feynman theorem # ∂λ_i/∂θ_a = ⟨i|∂ρ/∂θ_a|i⟩ d_eigvals_dtheta = np.zeros((self.D, self.n_params)) for a in range(self.n_params): drho_a_eigenbasis = eigvecs.conj().T @ drho_dtheta[a] @ eigvecs d_eigvals_dtheta[:, a] = np.diag(drho_a_eigenbasis).real # Compute eigenvector derivatives using first-order perturbation theory # ∂|i⟩/∂θ_a = ∑_{j≠i} (⟨j|∂ρ/∂θ_a|i⟩)/(λ_i - λ_j) |j⟩ # This is stored as a matrix: d_eigvecs_dtheta[a] @ eigvecs gives the derivative d_eigvecs_dtheta = [] for a in range(self.n_params): drho_a_eigenbasis = eigvecs.conj().T @ drho_dtheta[a] @ eigvecs d_U = np.zeros((self.D, self.D), dtype=complex) for i in range(self.D): for j in range(self.D): if i != j: denom = eigvals[i] - eigvals[j] if np.abs(denom) > 1e-10: # ∂U_ji/∂θ_a (column i of U is eigenvector i) d_U[j, i] = drho_a_eigenbasis[j, i] / denom d_eigvecs_dtheta.append(d_U) # Now compute ∂G_ab/∂θ_c for all a,b,c # Then contract with θ to get (∇G)[θ] contraction = np.zeros((self.n_params, self.n_params)) for a in range(self.n_params): for b in range(self.n_params): # Compute ∂G_ab/∂θ_c for all c dG_ab_dtheta = np.zeros(self.n_params) # Get centered operators in eigenbasis A_a = centered_operator_eigenbasis(self.operators[a]) A_b = centered_operator_eigenbasis(self.operators[b]) for c in range(self.n_params): # Differentiate the spectral BKM formula # G_ab = ∑_{i,j} k(p_i, p_j) * A_a[i,j] * conj(A_b[i,j]) # Three terms from product rule: # 1. ∂k/∂θ_c * A_a * conj(A_b) # 2. k * ∂A_a/∂θ_c * conj(A_b) # 3. k * A_a * conj(∂A_b/∂θ_c) dG_ab_c = 0.0 for i in range(self.D): for j in range(self.D): k_ij = bkm_kernel(eigvals[i], eigvals[j]) # Term 1: ∂k/∂θ_c # ∂k/∂p_i and ∂k/∂p_j, then chain rule with ∂p_i/∂θ_c if np.abs(eigvals[i] - eigvals[j]) < 1e-14: # k = p_i, so ∂k/∂p_i = 1, ∂k/∂p_j = 0 dk_dtheta_c = d_eigvals_dtheta[i, c] else: log_diff = np.log(eigvals[i]) - np.log(eigvals[j]) p_diff = eigvals[i] - eigvals[j] # ∂k/∂p_i = (log_diff - p_diff/p_i) / log_diff² # ∂k/∂p_j = -(log_diff - p_diff/p_j) / log_diff² dk_dp_i = (log_diff - p_diff/eigvals[i]) / (log_diff**2) dk_dp_j = -(log_diff - p_diff/eigvals[j]) / (log_diff**2) dk_dtheta_c = dk_dp_i * d_eigvals_dtheta[i, c] + dk_dp_j * d_eigvals_dtheta[j, c] term1 = dk_dtheta_c * A_a[i,j] * np.conj(A_b[i,j]) # Terms 2 & 3: ∂A_a/∂θ_c and ∂A_b/∂θ_c # A_a = U† F̃_a U, so ∂A_a/∂θ = (∂U†/∂θ) F̃_a U + U† (∂F̃_a/∂θ) U + U† F̃_a (∂U/∂θ) # This is complex, so for now use the fact that the derivative involves # the eigenvector derivatives we computed # Simplified: Assume the main contribution comes from eigenvalue changes # (This is an approximation - full implementation would include eigenvector derivatives) # For now, just use term 1 dG_ab_c += term1.real dG_ab_dtheta[c] = dG_ab_c # Contract with θ: ∑_c (∂G_ab/∂θ_c) θ_c contraction[a, b] = np.dot(dG_ab_dtheta, theta) return contraction
[docs] def constraint_hessian_fd_theta_only(self, theta: np.ndarray, eps: float = 1e-5) -> np.ndarray: """ Compute constraint Hessian ∇²C using finite differences of θ-only gradient. This is a fast, accurate method that computes: ∂²C/∂θ_a∂θ_b ≈ [∇C(θ + eps·e_b) - ∇C(θ - eps·e_b)]_a / (2·eps) By differentiating the exact θ-only gradient, this achieves better accuracy than the current approach which uses exact formulas with approximate second derivatives (FD of Duhamel drho). Expected speedup: 50-100× over current Duhamel-based method. Expected accuracy: ~10⁻⁸ (better than current ~10⁻⁶). Parameters ---------- theta : ndarray Natural parameters eps : float, optional Finite difference step size (default: 1e-5) Optimal for central differences: h ≈ (machine_eps)^(1/3) ≈ 1e-5 Returns ------- hess : ndarray, shape (n_params, n_params) Hessian matrix ∇²C, symmetric real matrix Notes ----- **Why this is better than current approach:** Current (two approximations): Error ≈ 10⁻⁶ - ``∂²C/∂θ_a∂θ_b = f(∂²ρ/∂θ_a∂θ_b) = f(FD(∂ρ/∂θ)) = f(FD(Duhamel(ρ)))`` New (one approximation): Error ≈ 10⁻⁸ - ``∂²C/∂θ_a∂θ_b ≈ FD(∂C/∂θ_a) = FD(exact_BKM_formula(ρ))`` Key insight: Differentiating an exact gradient is more accurate than using an exact formula with approximate second derivatives. """ n = self.n_params hess = np.zeros((n, n)) # Compute Hessian by finite differences of θ-only gradient for b in range(n): # Perturbation in direction b e_b = np.zeros(n) e_b[b] = eps # Compute gradients at θ ± eps·e_b using exact θ-only formula _, grad_plus = self.marginal_entropy_constraint_theta_only(theta + e_b) _, grad_minus = self.marginal_entropy_constraint_theta_only(theta - e_b) # Central difference for column b hess[:, b] = (grad_plus - grad_minus) / (2 * eps) # Symmetrize to ensure exact symmetry (should already be symmetric to roundoff) hess = 0.5 * (hess + hess.T) return hess
[docs] def constraint_hessian(self, theta: np.ndarray, method: str = 'fd_theta_only', n_points: int = 100, eps: float = 1e-5) -> np.ndarray: """ Compute ∇²C, the Hessian of the constraint C(θ) = ∑ᵢ hᵢ(θ). This method dispatches to different implementations based on the method parameter. Parameters ---------- theta : ndarray, shape (n_params,) Natural parameters method : str, default='fd_theta_only' Method for Hessian computation. Options: - ``'fd_theta_only'``: FD of θ-only gradient (default). ~50-100× faster, ~10⁻⁸ error, recommended. - ``'duhamel'``: FD of Duhamel drho (legacy, slow, ~10⁻⁶ error). - ``'sld'``: Analytic formula using SLD (legacy, fast but ~10% error). n_points : int, default=100 Quadrature points for Duhamel (ignored for other methods) eps : float, default=1e-5 Finite difference step size Returns ------- hessian : ndarray, shape (n_params, n_params) Constraint Hessian ∇²C (symmetric matrix) Notes ----- The default 'fd_theta_only' method uses finite differences of the exact θ-only gradient, achieving better accuracy and much better performance than the legacy methods which use exact formulas with approximate inputs. """ # Dispatch to appropriate implementation if method == 'fd_theta_only': return self.constraint_hessian_fd_theta_only(theta, eps=eps) # Legacy methods: materialize ∂²ρ/∂θ_a∂θ_b for each (a,b) from qig.core import partial_trace rho = self.rho_from_theta(theta) I_full = np.eye(self.D, dtype=complex) # Compute ∂ρ/∂θ for all parameters # Use the same method as for ∂²ρ for consistency drho_method = 'duhamel' if method == 'duhamel' else 'sld' drho_dtheta = [] for a in range(self.n_params): drho_dtheta.append(self.rho_derivative(theta, a, method=drho_method, n_points=n_points)) # Initialise Hessian hessian = np.zeros((self.n_params, self.n_params)) # Sum over all marginals for i in range(self.n_sites): # Compute marginal ρᵢ rho_i = partial_trace(rho, self.dims, keep=i) d_i = rho_i.shape[0] I_i = np.eye(d_i, dtype=complex) # Eigendecomposition of ρᵢ eigvals_i, eigvecs_i = eigh(rho_i) eigvals_i = np.maximum(eigvals_i.real, 1e-14) # Compute log(ρᵢ) log_eigvals_i = np.log(eigvals_i) log_rho_i = eigvecs_i @ np.diag(log_eigvals_i) @ eigvecs_i.conj().T # Compute ∂ρᵢ/∂θ for all parameters drho_i_dtheta = [] for a in range(self.n_params): drho_i_dtheta.append(partial_trace(drho_dtheta[a], self.dims, keep=i)) # Compute ∂²hᵢ/∂θ_a∂θ_b for all a, b for a in range(self.n_params): for b in range(a, self.n_params): # Only upper triangle (symmetric) # Term 1: -Tr(∂²ρᵢ/∂θ_a∂θ_b (I + log ρᵢ)) # We need ∂²ρᵢ/∂θ_a∂θ_b if method == 'duhamel': # High-precision: numerical differentiation of Duhamel # Gives ~0.5-2.6% error (30-100× better than SLD) d2rho_dtheta_ab = self.rho_second_derivative( theta, a, b, method='numerical_duhamel', n_points=n_points, eps=eps ) else: # method == 'sld' # Fast analytic: SLD-based formula # Gives ~8-12% error but much faster # From ∂ρ/∂θ_a = (1/2)[ρ(F_a - ⟨F_a⟩I) + (F_a - ⟨F_a⟩I)ρ]: # ∂²ρ/∂θ_a∂θ_b = (1/2)[∂ρ/∂θ_b (F_a - ⟨F_a⟩I) + (F_a - ⟨F_a⟩I) ∂ρ/∂θ_b] # - ρ Cov_sym(F_b, F_a) F_a = self.operators[a] F_b = self.operators[b] mean_Fa = np.trace(rho @ F_a).real mean_Fb = np.trace(rho @ F_b).real F_a_centered = F_a - mean_Fa * I_full # Symmetrized covariance cov_sym = 0.5 * (np.trace(rho @ F_b @ F_a).real + np.trace(rho @ F_a @ F_b).real) - mean_Fb * mean_Fa # ∂²ρ/∂θ_a∂θ_b (SLD-based) d2rho_dtheta_ab = (0.5 * (drho_dtheta[b] @ F_a_centered + F_a_centered @ drho_dtheta[b]) - cov_sym * rho) # Partial trace to get ∂²ρᵢ/∂θ_a∂θ_b d2rho_i_dtheta_ab = partial_trace(d2rho_dtheta_ab, self.dims, keep=i) # Term 1 term1 = -np.trace(d2rho_i_dtheta_ab @ (I_i + log_rho_i)).real # Term 2: -Tr(∂ρᵢ/∂θ_a ∂(log ρᵢ)/∂θ_b) # Compute ∂(log ρᵢ)/∂θ_b using Daleckii-Krein formula # Transform ∂ρᵢ/∂θ_b to eigenbasis of ρᵢ drho_i_b_eigenbasis = eigvecs_i.conj().T @ drho_i_dtheta[b] @ eigvecs_i # Apply Daleckii-Krein formula element-wise dlog_rho_i_eigenbasis = np.zeros_like(drho_i_b_eigenbasis) for ii in range(d_i): for jj in range(d_i): if ii == jj: # Diagonal: ∂(log ρᵢ)/∂θ_b = (∂ρᵢ/∂θ_b) / ρᵢ dlog_rho_i_eigenbasis[ii, jj] = ( drho_i_b_eigenbasis[ii, jj] / eigvals_i[ii] ) else: # Off-diagonal: Daleckii-Krein formula log_diff = log_eigvals_i[ii] - log_eigvals_i[jj] p_diff = eigvals_i[ii] - eigvals_i[jj] if np.abs(p_diff) > 1e-10: dlog_rho_i_eigenbasis[ii, jj] = ( drho_i_b_eigenbasis[ii, jj] * log_diff / p_diff ) else: # Limit as p_i → p_j: (log p_i - log p_j)/(p_i - p_j) → 1/p_i dlog_rho_i_eigenbasis[ii, jj] = ( drho_i_b_eigenbasis[ii, jj] / eigvals_i[ii] ) # Transform back to original basis dlog_rho_i_dtheta_b = (eigvecs_i @ dlog_rho_i_eigenbasis
[docs] @ eigvecs_i.conj().T) # Term 2 term2 = -np.trace(drho_i_dtheta[a] @ dlog_rho_i_dtheta_b).real # ∂²hᵢ/∂θ_a∂θ_b d2h_i = term1 + term2 # Add to Hessian (symmetric, so fill both (a,b) and (b,a)) hessian[a, b] += d2h_i if a != b: hessian[b, a] += d2h_i return hessian
def lagrange_multiplier_gradient(self, theta: np.ndarray, method: str = 'sld', n_points: int = 100) -> np.ndarray: """ Compute ∇ν, the gradient of the Lagrange multiplier ν(θ). From the paper (equations 831-832), the gradient components are: ``∂ν/∂θ_j = (1/||a||²) [a^T G e_j + a^T (∇G)[θ] e_j + (∇a)_j^T G θ - 2ν a^T (∇a)_j]`` where: - ``ν = (a^T G θ)/(a^T a)`` is the Lagrange multiplier - ``a = ∇C = ∇(∑h_i)`` is the constraint gradient - ``G`` = Fisher information (BKM metric) - ``(∇G)[θ]`` = third cumulant tensor contracted with θ - ``∇a = ∇²C`` is the constraint Hessian Parameters ---------- theta : ndarray, shape (n_params,) Natural parameters method : str, default='sld' 'sld' or 'duhamel' for derivative precision n_points : int, default=100 Quadrature points for Duhamel (ignored for 'sld') Returns ------- grad_nu : ndarray, shape (n_params,) Gradient ∇ν Notes ----- Uses all the high-precision components from Steps 1-3: - BKM metric G (spectral formula) - Third cumulant (∇G)[θ] (perturbation theory) - Constraint Hessian ∇²C (Duhamel for high precision) """ # Get constraint gradient a = ∇C and constraint value (use optimized default) C, a = self.marginal_entropy_constraint(theta) # Uses theta_only by default # Get BKM metric G G = self.fisher_information(theta) # Compute Lagrange multiplier ν = (a^T G θ)/(||a||²) a_norm_sq = np.dot(a, a) nu = np.dot(a, G @ theta) / a_norm_sq # Get third cumulant contraction (∇G)[θ] (use optimized default) third_cumulant_contracted = self.third_cumulant_contraction(theta) # Uses fd by default # Get constraint Hessian ∇²C = ∇a (use optimized default) hessian_C = self.constraint_hessian(theta) # Uses fd_theta_only by default # Compute gradient ∇ν for each parameter j grad_nu = np.zeros(self.n_params) for j in range(self.n_params): # e_j is the j-th standard basis vector (handled implicitly) # Term 1: a^T G e_j = (G^T a)_j = (Ga)_j (since G is symmetric) term1 = (G @ a)[j] # Term 2: a^T (∇G)[θ] e_j = [a^T (∇G)[θ]]_j # This is the j-th component of a^T times the third cumulant contraction term2 = np.dot(a, third_cumulant_contracted[:, j]) # Term 3: (∇a)_j^T G θ # (∇a)_j is the j-th column of the Hessian grad_a_j = hessian_C[:, j] term3 = np.dot(grad_a_j, G @ theta) # Term 4: -2ν a^T (∇a)_j term4 = -2 * nu * np.dot(a, grad_a_j) # Combine all terms grad_nu[j] = (term1 + term2 + term3 + term4) / a_norm_sq return grad_nu
[docs] def jacobian(self, theta: np.ndarray, method: str = 'duhamel', n_points: int = 100) -> np.ndarray: """ Compute the Jacobian M = ∂F/∂θ of the constrained dynamics. From the paper (equations 824-827): F(θ) = -G(θ)θ + ν(θ)a(θ) M = ∂F/∂θ = -G - (∇G)[θ] + ν∇²C + a(∇ν)^T For systems with local operators only (no entanglement): - Structural identity Gθ = -a holds - ν = -1 always, ∇ν = 0 - Simplifies to: M = -G - (∇G)[θ] - ∇²C For systems with entangling operators (pair basis): - Structural identity BROKEN: Gθ ≠ -a - ν ≠ -1, ∇ν ≠ 0 - Must use full formula: M = -G - (∇G)[θ] + ν∇²C + a(∇ν)^T Parameters ---------- theta : ndarray, shape (n_params,) Natural parameters method : str, default='duhamel' 'sld' or 'duhamel' for derivative precision Changed default to 'duhamel' for better accuracy with entangled states n_points : int, default=100 Quadrature points for Duhamel (ignored for 'sld') Returns ------- M : ndarray, shape (n_params, n_params) Jacobian matrix Notes ----- This is the full Jacobian for GENERIC dynamics: θ̇ = F(θ) = -Gθ + νa The degeneracy of M determines the geometry of the constraint manifold and is central to the paper's analysis of the inaccessible game. """ # Get all components (use optimized defaults for each) G = self.fisher_information(theta) C, a = self.marginal_entropy_constraint(theta) # Uses theta_only by default third_cumulant = self.third_cumulant_contraction(theta) # Uses fd by default hessian_C = self.constraint_hessian(theta) # Uses fd_theta_only by default # Compute Lagrange multiplier Gtheta = G @ theta nu = np.dot(a, Gtheta) / np.dot(a, a) # Compute Lagrange multiplier gradient grad_nu = self.lagrange_multiplier_gradient(theta, method=method, n_points=n_points) # Assemble full Jacobian: M = -G - (∇G)[θ] + ν∇²C + a(∇ν)^T M = -G - third_cumulant + nu * hessian_C + np.outer(a, grad_nu) return M
[docs] def symmetric_part(self, theta: np.ndarray, method: str = 'duhamel', n_points: int = 100) -> np.ndarray: """ Compute symmetric part S of the flow Jacobian M. The GENERIC decomposition splits M into symmetric and antisymmetric parts: M = S + A, where S = (M + M^T)/2 The symmetric part S generates the irreversible (dissipative) dynamics. Parameters ---------- theta : np.ndarray Natural parameters method : str Method for computing Jacobian n_points : int Number of points for numerical integration Returns ------- S : np.ndarray, shape (n_params, n_params) Symmetric part of Jacobian, satisfies S = S^T Notes ----- The symmetric part satisfies key degeneracy conditions: - S @ a ≈ 0, where a = ∇C is the constraint gradient - θ^T S θ ≥ 0 (entropy production non-negative on tangent space) See Also -------- antisymmetric_part : Antisymmetric part of Jacobian verify_degeneracy_conditions : Verify GENERIC structure """ M = self.jacobian(theta, method=method, n_points=n_points) S = 0.5 * (M + M.T) return S
[docs] def antisymmetric_part(self, theta: np.ndarray, method: str = 'duhamel', n_points: int = 100) -> np.ndarray: """ Compute antisymmetric part A of the flow Jacobian M. The GENERIC decomposition splits M into symmetric and antisymmetric parts: M = S + A, where A = (M - M^T)/2 The antisymmetric part A generates the reversible (Hamiltonian) dynamics. Parameters ---------- theta : np.ndarray Natural parameters method : str Method for computing Jacobian n_points : int Number of points for numerical integration Returns ------- A : np.ndarray, shape (n_params, n_params) Antisymmetric part of Jacobian, satisfies A = -A^T Notes ----- The antisymmetric part satisfies key degeneracy conditions: - A @ (-G @ theta) ≈ 0, where -G @ theta = ∇H is the entropy gradient The antisymmetric part encodes the effective Hamiltonian through: A_ab θ_b = Σ_c f_abc η_c, where η are the Hamiltonian coefficients. See Also -------- symmetric_part : Symmetric part of Jacobian verify_degeneracy_conditions : Verify GENERIC structure """ M = self.jacobian(theta, method=method, n_points=n_points) A = 0.5 * (M - M.T) return A
[docs] def verify_degeneracy_conditions(self, theta: np.ndarray, method: str = 'duhamel', n_points: int = 100, tol: float = 1e-6) -> Dict[str, Any]: """ Verify degeneracy conditions for GENERIC structure. The GENERIC structure requires that the symmetric and antisymmetric parts satisfy specific degeneracy conditions related to the constraint and entropy gradients. Parameters ---------- theta : np.ndarray Natural parameters method : str Method for computing Jacobian n_points : int Number of points for numerical integration tol : float Tolerance for degeneracy conditions Returns ------- diagnostics : dict Dictionary containing: - 'S': Symmetric part - 'A': Antisymmetric part - 'constraint_gradient': a = ∇C - 'entropy_gradient': ∇H = -G @ theta - 'S_annihilates_constraint': ||S @ a|| - 'A_annihilates_entropy_gradient': ||A @ (-G @ theta)|| - 'entropy_production': θ^T S θ - 'S_symmetric_error': ||S - S^T|| - 'A_antisymmetric_error': ||A + A^T|| - 'reconstruction_error': ||M - (S + A)|| - 'all_passed': bool indicating if all checks passed Notes ----- Degeneracy conditions (should hold within tolerance): 1. S @ a ≈ 0: Symmetric part annihilates constraint gradient 2. A @ ∇H ≈ 0: Antisymmetric part annihilates entropy gradient 3. θ^T S θ ≥ 0: Entropy production non-negative 4. S = S^T: Symmetry (machine precision) 5. A = -A^T: Antisymmetry (machine precision) 6. M = S + A: Reconstruction (machine precision) """ from qig.validation import ValidationReport # Compute components M = self.jacobian(theta, method=method, n_points=n_points) S = self.symmetric_part(theta, method=method, n_points=n_points) A = self.antisymmetric_part(theta, method=method, n_points=n_points) # Get constraint gradient (using marginal entropies constraint) marginals = marginal_entropies(self.rho_from_theta(theta), [self.d] * (self.n_pairs if self.pair_basis else self.n_sites)) # Gradient of constraint C(θ) = Σ h_i # Use finite differences for gradient eps = 1e-7 a = np.zeros(len(theta)) C0 = np.sum(marginals) for i in range(len(theta)): theta_plus = theta.copy() theta_plus[i] += eps rho_plus = self.rho_from_theta(theta_plus) marginals_plus = marginal_entropies(rho_plus, [self.d] * (self.n_pairs if self.pair_basis else self.n_sites)) C_plus = np.sum(marginals_plus) a[i] = (C_plus - C0) / eps # Entropy gradient: ∇H = -G @ theta G = self.fisher_information(theta) entropy_gradient = -G @ theta # Compute degeneracy violations S_a = S @ a A_entropy_grad = A @ entropy_gradient S_annihilates_constraint = np.linalg.norm(S_a) A_annihilates_entropy_gradient = np.linalg.norm(A_entropy_grad) # Entropy production entropy_production = theta @ S @ theta # Symmetry/antisymmetry errors S_symmetric_error = np.max(np.abs(S - S.T)) A_antisymmetric_error = np.max(np.abs(A + A.T)) # Reconstruction error reconstruction_error = np.max(np.abs(M - (S + A))) # Check all conditions all_passed = ( S_annihilates_constraint < tol and A_annihilates_entropy_gradient < tol and entropy_production >= -tol and S_symmetric_error < 1e-14 and A_antisymmetric_error < 1e-14 and reconstruction_error < 1e-14 ) diagnostics = { 'S': S, 'A': A, 'constraint_gradient': a, 'entropy_gradient': entropy_gradient, 'S_annihilates_constraint': S_annihilates_constraint, 'A_annihilates_entropy_gradient': A_annihilates_entropy_gradient, 'entropy_production': entropy_production, 'S_symmetric_error': S_symmetric_error, 'A_antisymmetric_error': A_antisymmetric_error, 'reconstruction_error': reconstruction_error, 'all_passed': all_passed, 'tolerance': tol } return diagnostics
def _grad_psi(self, theta: np.ndarray) -> np.ndarray: """ Compute ∇ψ(θ), the analytical gradient of the cumulant generating function ψ(θ). Since ψ(θ) = log Tr(exp(K(θ))) where K(θ) = ∑ θ_a F_a, differentiate directly: ∂ψ/∂θ_a = ∂/∂θ_a [log Tr(exp(K))] = [1/Tr(exp(K))] * Tr( ∂/∂θ_a exp(K) ) Since ∂/∂θ_a exp(K) = exp(K) * F_a (in the sense of matrix multiplication), we get: ∂ψ/∂θ_a = Tr( exp(K) F_a ) / Tr(exp(K)) This computes the gradient directly from the cumulant generating function without materializing ρ(θ) as an intermediate. Parameters ---------- theta : ndarray Natural parameters Returns ------- grad_psi : ndarray, shape (n_params,) ∇ψ(θ), the gradient of the cumulant generating function """ # Compute K(θ) = ∑ θ_a F_a K = sum(theta_a * F_a for theta_a, F_a in zip(theta, self.operators)) # Shift for stability. Since exp(K) = exp(c) exp(K-cI), the scalar cancels # in Tr(exp(K) F_a) / Tr(exp(K)). lambda_max = float(np.max(np.linalg.eigvalsh(K).real)) K_shift = K - lambda_max * np.eye(self.D, dtype=complex) exp_K = expm(K_shift) # Compute normalization Z = Tr(exp(K)) Z = np.trace(exp_K) # Compute ∇ψ(θ)_a = Tr(exp(K) F_a) / Z for each a return np.array([np.trace(exp_K @ F_a).real / Z.real for F_a in self.operators])
[docs] def von_neumann_entropy(self, theta: np.ndarray) -> float: """ Compute the von Neumann entropy. For exponential families ρ(θ) = exp(K(θ) - ψ(θ)), we use the fundamental identity: H(ρ) = ψ(θ) - θ^T ∇ψ(θ) where ∇ψ(θ) is the analytical gradient of the cumulant generating function ψ(θ). This directly uses the exponential family structure and is more numerically stable than eigendecomposition for states within the exponential family manifold. Parameters ---------- theta : ndarray Natural parameters Returns ------- float Von Neumann entropy in nats """ # Use exponential family identity: H(ρ) = ψ(θ) - θ^T ∇ψ(θ) psi_theta = self.psi(theta) grad_psi = self._grad_psi(theta) entropy = psi_theta - np.dot(theta, grad_psi) # Ensure non-negative (numerical precision issues) return max(0.0, float(entropy.real))
[docs] def mutual_information(self, theta: np.ndarray) -> float: """ Compute mutual information I = C - H where C = ∑h_i, H = S(ρ). For separable states: I = 0 For entangled states: I > 0 Maximum for Bell states: I = 2log(d) Parameters ---------- theta : ndarray Natural parameters Returns ------- float Mutual information in nats Notes ----- This only makes sense for pair-based systems. For local operators, I ≈ 0 always since those can only create separable states. """ rho = self.rho_from_theta(theta) h_marginals = marginal_entropies(rho, self.dims) C = np.sum(h_marginals) H = self.von_neumann_entropy(theta) return C - H
[docs] def purity(self, theta: np.ndarray) -> float: """ Compute purity Tr(ρ²). Pure states: Tr(ρ²) = 1 Maximally mixed: Tr(ρ²) = 1/D Parameters ---------- theta : ndarray Natural parameters Returns ------- float Purity, between 1/D and 1 """ rho = self.rho_from_theta(theta) return np.trace(rho @ rho).real
# ========================================================================= # CIP-0008: σ-parametrised regularisation infrastructure # =========================================================================
[docs] def validate_sigma(self, sigma: np.ndarray) -> Tuple[bool, str]: """ Validate that σ is a valid density matrix of correct dimension. Parameters ---------- sigma : ndarray Matrix to validate Returns ------- is_valid : bool True if σ is a valid density matrix message : str "Valid" or error description """ D = self.D # Check shape if sigma.shape != (D, D): return False, f"Shape mismatch: expected ({D}, {D}), got {sigma.shape}" # Check Hermitian if not np.allclose(sigma, sigma.conj().T, atol=1e-10): return False, "σ must be Hermitian" # Check positive semidefinite eigvals = np.linalg.eigvalsh(sigma) if np.any(eigvals < -1e-10): return False, f"σ must be PSD, min eigenvalue: {eigvals.min():.2e}" # Check unit trace tr = np.trace(sigma).real if not np.isclose(tr, 1.0, atol=1e-10): return False, f"Tr(σ) must be 1, got {tr:.6f}" return True, "Valid"
[docs] def is_product_sigma( self, sigma: np.ndarray, tol: float = 1e-6 ) -> Tuple[bool, Optional[List[np.ndarray]]]: """ Check if σ has product structure σ = σ₁⊗σ₂⊗...⊗σₙ. Parameters ---------- sigma : ndarray Density matrix to check tol : float Tolerance for product structure detection Returns ------- is_product : bool True if σ is (approximately) a product state factors : list of ndarray or None If product, the individual σₖ matrices; otherwise None """ if self.n_pairs == 1: # Single pair is trivially a "product" return True, [sigma] # For multi-pair, check if σ can be factorised # This is expensive in general - use partial trace to check consistency D_pair = self.d ** 2 # Extract marginals for each pair marginals = [] for k in range(self.n_pairs): # Trace out all pairs except k sigma_k = self._partial_trace_to_pair(sigma, k) marginals.append(sigma_k) # Reconstruct product state from marginals sigma_product = marginals[0] for k in range(1, self.n_pairs): sigma_product = np.kron(sigma_product, marginals[k]) # Check if reconstruction matches if np.allclose(sigma, sigma_product, atol=tol): return True, marginals return False, None
def _partial_trace_to_pair(self, sigma: np.ndarray, pair_idx: int) -> np.ndarray: """Trace out all pairs except pair_idx, returning d²×d² density matrix.""" D_pair = self.d ** 2 n = self.n_pairs if n == 1: return sigma # Nothing to trace # Reshape to tensor with legs for each pair # Shape: (D_pair,) * n for bra, then (D_pair,) * n for ket sigma_tensor = sigma.reshape([D_pair] * (2 * n)) # Use einsum to trace out all pairs except pair_idx # Build index strings: bra indices 0..n-1, ket indices n..2n-1 # Pairs to trace get same index letter; pair to keep gets distinct letters bra_indices = [] ket_indices = [] trace_letter = ord('a') keep_bra = chr(ord('z') - 1) # 'y' keep_ket = chr(ord('z')) # 'z' for k in range(n): if k == pair_idx: bra_indices.append(keep_bra) ket_indices.append(keep_ket) else: letter = chr(trace_letter) bra_indices.append(letter) ket_indices.append(letter) # Same letter = trace over this pair trace_letter += 1 input_str = ''.join(bra_indices + ket_indices) output_str = keep_bra + keep_ket einsum_str = f"{input_str}->{output_str}" result = np.einsum(einsum_str, sigma_tensor) return result.reshape(D_pair, D_pair)
[docs] def detect_sigma_structure(self, sigma: np.ndarray) -> str: """ Detect structure of σ for efficiency optimisation. Parameters ---------- sigma : ndarray Regularisation matrix Returns ------- structure : str One of: 'isotropic', 'product', 'pure', 'general' """ D = self.D # Check if isotropic (I/D) if np.allclose(sigma, np.eye(D) / D, atol=1e-10): return 'isotropic' # Check if product state (for multi-pair) if self.n_pairs > 1: is_prod, _ = self.is_product_sigma(sigma) if is_prod: return 'product' # Check if rank-1 (pure state) eigvals = np.linalg.eigvalsh(sigma) n_nonzero = np.sum(eigvals > 1e-10) if n_nonzero == 1: return 'pure' return 'general'
[docs] def regularise_pure_state( self, psi: np.ndarray, epsilon: float, sigma: Optional[np.ndarray] = None, ) -> np.ndarray: """ Create regularised density matrix from pure state. ρ_ε = (1-ε)|ψ⟩⟨ψ| + ε σ Parameters ---------- psi : ndarray Pure state vector (length D) epsilon : float Regularisation strength, must be in (0, 1) sigma : ndarray, optional Regularisation direction. Default: I/D (isotropic) Returns ------- rho_epsilon : ndarray Regularised density matrix (D×D) """ D = self.D # Validate psi if psi.shape != (D,): raise ValueError(f"psi must have length {D}, got {psi.shape}") # Normalise psi if needed norm = np.linalg.norm(psi) if not np.isclose(norm, 1.0, atol=1e-10): psi = psi / norm # Validate epsilon if not 0 < epsilon < 1: raise ValueError(f"epsilon must be in (0, 1), got {epsilon}") # Default sigma is isotropic if sigma is None: sigma = np.eye(D, dtype=complex) / D else: # Validate sigma is_valid, msg = self.validate_sigma(sigma) if not is_valid: raise ValueError(f"Invalid sigma: {msg}") # Create regularised state rho_pure = np.outer(psi, psi.conj()) rho_epsilon = (1 - epsilon) * rho_pure + epsilon * sigma return rho_epsilon
def _bell_parameters_product_sigma( self, eps_val: float, log_eps: float, sigma_per_pair: List[np.ndarray], bell_indices: Optional[List[int]] = None, ) -> np.ndarray: """ Efficient θ computation for product σ = σ₁⊗...⊗σₙ. For each pair k, the marginal is: ρₖ_ε = (1-ε)|Φₖ⟩⟨Φₖ| + ε σₖ We compute log(ρₖ_ε) via small d²×d² eigendecomposition, then project onto the per-pair operators. Complexity: O(n × d⁶) instead of O(d^(6n)) Parameters ---------- eps_val : float Regularisation ε value log_eps : float log(ε) for numerical stability sigma_per_pair : list of ndarray List of n density matrices, each d²×d² for one pair bell_indices : list of int, optional Which Bell state (k=0,...,d-1) to use for each pair. Default: all zeros (standard Bell state |Φ₀⟩). Returns ------- theta : ndarray Natural parameters [θ₁, θ₂, ..., θₙ] """ from .pair_operators import pair_basis_generators, bell_state d = self.d D_pair = d ** 2 n_ops_per_pair = D_pair ** 2 - 1 # d⁴ - 1 # Get single-pair generators single_pair_ops = pair_basis_generators(d) # Handle bell_indices if bell_indices is None: bell_indices = [0] * self.n_pairs # Compute θ for each pair theta = np.zeros(self.n_params) for k in range(self.n_pairs): sigma_k = sigma_per_pair[k] # Get Bell state for this pair (may differ per pair via bell_indices) psi_bell_k = bell_state(d, bell_indices[k]) rho_bell_k = np.outer(psi_bell_k, psi_bell_k.conj()) # Form ρₖ_ε = (1-ε)|Φₖ⟩⟨Φₖ| + ε σₖ rho_k_eps = (1 - eps_val) * rho_bell_k + eps_val * sigma_k # Ensure Hermitian rho_k_eps = (rho_k_eps + rho_k_eps.conj().T) / 2 # Compute log via eigendecomposition (small d²×d² matrix) eigvals, U = eigh(rho_k_eps) eigvals = np.maximum(eigvals, np.finfo(float).tiny) log_eigvals = np.log(eigvals) log_rho_k = U @ np.diag(log_eigvals) @ U.conj().T # Project onto per-pair operators # θₐ = Tr(log(ρₖ) Fₐ) / Tr(Fₐ²) start_idx = k * n_ops_per_pair for a, F_a in enumerate(single_pair_ops): numerator = np.real(np.trace(log_rho_k @ F_a)) denominator = np.real(np.trace(F_a @ F_a)) if denominator > 0: theta[start_idx + a] = numerator / denominator return theta
[docs] def get_bell_state_parameters( self, epsilon: float = 1e-6, log_epsilon: Optional[float] = None, sigma: Optional[np.ndarray] = None, sigma_per_pair: Optional[List[np.ndarray]] = None, bell_indices: Optional[List[int]] = None, ) -> np.ndarray: """ Get natural parameters θ corresponding to a regularised Bell state. A Bell state is a pure state (rank 1), which lies at the boundary of the exponential family where natural parameters θ → -∞. For regularised state: ``ρ_ε = (1-ε)|Φ⟩⟨Φ| + ε σ`` We compute θ by solving: ``ρ_ε = exp(Σ θₐFₐ - ψ(θ))`` This gives: ``θₐ ∝ Tr(log(ρ_ε) Fₐ)`` Parameters ---------- epsilon : float, default=1e-6 Regularisation parameter. Smaller epsilon → closer to pure Bell state (more negative θ). Must be > 0 to avoid singularities. log_epsilon : float, optional If provided, overrides ``epsilon`` via log ε = log_epsilon. This is numerically convenient when exploring very small ε. sigma : ndarray, optional Full D×D regularisation matrix. Any valid density matrix. Use for entangled σ (inter-pair correlations in perturbation). If None and sigma_per_pair is None, uses I/D (isotropic). bell_indices : list of int, optional Which Bell state (k=0,...,d-1) to use for each pair. Default: all zeros (standard Bell state ``|Φ₀⟩``). Allows exploring different pure state origins. sigma_per_pair : list of ndarray, optional List of n density matrices, each d²×d² for one pair. Constructs σ = σ₁⊗σ₂⊗...⊗σₙ (product structure). Efficient O(n) computation preserved. Only valid for multi-pair systems (n_pairs > 1). Returns ------- theta : ndarray Natural parameters for the regularized Bell state. Many components will be large and negative (approaching -∞ for pure state). Raises ------ ValueError If both sigma and sigma_per_pair are provided. If sigma_per_pair has wrong length or invalid matrices. Notes ----- The regularisation matrix σ encodes the "direction of approach" to the pure-state boundary (see CIP-0008 and entropy_time_paths.ipynb): - Different σ = different "meridians" from the north pole - Isotropic σ = I/D gives the "boring" symmetric departure - Anisotropic σ reveals the tangent cone of possible departures For the exponential family ρ(θ) = exp(H(θ))/Z where H = Σ θₐFₐ, we have: log(ρ) = H - log(Z)·I Since Tr(Fₐ) = 0 for our operators, we get: θₐ = Tr(log(ρ) Fₐ) / Tr(FₐFₐ) **Multi-pair note**: For n_pairs > 1, our operator basis is the direct sum of per-pair su(d²) algebras, NOT the full su(D) algebra. The regularised Bell state may have components outside this subspace. The returned θ is the projection onto our subspace, which is correct for the inaccessible game dynamics (which stay in this subspace). Reconstruction via ``rho_from_theta(θ)`` will NOT exactly match the target ρ_ε, but the dynamics are correct. """ if not self.pair_basis: raise ValueError("Bell states only defined for pair_basis=True") # Validate sigma options if sigma is not None and sigma_per_pair is not None: raise ValueError("Cannot specify both sigma and sigma_per_pair") # Handle sigma_per_pair for multi-pair systems (efficient O(n × d⁶) path) if sigma_per_pair is not None: if self.n_pairs == 1: raise ValueError("sigma_per_pair only valid for n_pairs > 1") if len(sigma_per_pair) != self.n_pairs: raise ValueError( f"sigma_per_pair must have {self.n_pairs} matrices, got {len(sigma_per_pair)}" ) # Validate each per-pair sigma (fast: O(n × d⁶) not O(D³)) D_pair = self.d ** 2 for k, sigma_k in enumerate(sigma_per_pair): if sigma_k.shape != (D_pair, D_pair): raise ValueError( f"sigma_per_pair[{k}] must be {D_pair}×{D_pair}, got {sigma_k.shape}" ) # Quick validation (skip full eigenvalue check for speed) if not np.allclose(sigma_k, sigma_k.conj().T, atol=1e-10): raise ValueError(f"sigma_per_pair[{k}] must be Hermitian") if not np.isclose(np.trace(sigma_k), 1.0, atol=1e-10): raise ValueError(f"sigma_per_pair[{k}] must have unit trace") # Go directly to efficient per-pair computation (skip full sigma build!) if log_epsilon is not None: log_eps = float(log_epsilon) else: log_eps = float(np.log(epsilon)) eps_val = float(np.exp(log_eps)) return self._bell_parameters_product_sigma(eps_val, log_eps, sigma_per_pair, bell_indices) # Work with log ε directly for numerical stability near the boundary. if log_epsilon is not None: log_eps = float(log_epsilon) if not np.isfinite(log_eps): raise ValueError("log_epsilon must be a finite real number") else: if epsilon <= 0: raise ValueError("epsilon must be > 0 (pure Bell state has θ → -∞)") log_eps = float(np.log(epsilon)) # IMPORTANT: # - For σ = I/D (isotropic), we can keep working in log-space even when # log_eps is far below float64 underflow (≈ -708). This avoids artificial # saturation in θ when exploring extreme log_eps. # - For more general σ (including per-pair σₖ), we must materialize # (1-ε)ρ + εσ in float64, so ε will underflow; those paths clamp. D = self.D # Get Bell state (product of n_pairs Bell states) psi_bell = product_of_bell_states(self.n_pairs, self.d, bell_indices=bell_indices) rho_bell = np.outer(psi_bell, psi_bell.conj()) # Determine sigma structure and compute log(ρ_ε) accordingly if sigma is None: # Isotropic case: σ = I/D (efficient analytic formula) sigma_structure = 'isotropic' else: # Validate sigma is_valid, msg = self.validate_sigma(sigma) if not is_valid: raise ValueError(f"Invalid sigma: {msg}") sigma_structure = self.detect_sigma_structure(sigma) if sigma_structure == 'isotropic': # Efficient analytic computation for σ = I/D # ρ_ε = (1-ε)|Ψ⟩⟨Ψ| + ε I/D has eigenvalues: # λ₀ = 1 - ε + ε/D (the Bell state direction) # λ_⊥ = ε/D (all D-1 orthogonal directions) # This works for any n_pairs! eigvals_bell, U = eigh(rho_bell) idx_max = int(np.argmax(eigvals_bell.real)) # ε may underflow for very negative log_eps; in that limit λ₀→1 so # log(λ₀)→0, while log(λ_⊥)=log_eps-log(D) remains well-defined. min_log_eps = float(np.log(np.finfo(float).tiny)) eps_val = float(np.exp(log_eps)) if log_eps >= min_log_eps else 0.0 factor = eps_val * (1.0 - 1.0 / D) log_lambda0 = np.log1p(-factor) if factor < 1.0 else np.log(np.finfo(float).tiny) log_lambda_perp = log_eps - np.log(D) log_diag = np.full(D, log_lambda_perp, dtype=float) log_diag[idx_max] = log_lambda0 log_rho = U @ np.diag(log_diag) @ U.conj().T elif sigma_structure == 'product' and self.n_pairs > 1: # Product sigma: efficient per-pair computation! # For σ = σ₁⊗...⊗σₙ and |Ψ⟩ = |Φ⟩⊗...⊗|Φ⟩: # The marginal on pair k is: ρₖ_ε = (1-ε)|Φ⟩⟨Φ| + ε σₖ # We compute θ^(k) from log(ρₖ_ε) directly. # Complexity: O(n × d⁶) instead of O(d^(6n)) # If sigma_per_pair wasn't provided, extract marginals from sigma if sigma_per_pair is None: sigma_per_pair = [ self._partial_trace_to_pair(sigma, k) for k in range(self.n_pairs) ] # Need ε in float64 arithmetic in the per-pair path; clamp to avoid # underflow producing singular logs. min_log_eps = float(np.log(np.finfo(float).tiny)) log_eps_clamped = max(log_eps, min_log_eps) eps_val = float(np.exp(log_eps_clamped)) return self._bell_parameters_product_sigma( eps_val, log_eps, sigma_per_pair, bell_indices ) else: # General case: compute ρ_ε and take matrix log # This is O(D³) but handles arbitrary σ import warnings if sigma_structure == 'general' and self.n_pairs > 1: warnings.warn( f"Using general σ (structure: {sigma_structure}) with {self.n_pairs} pairs. " "This loses O(n) efficiency.", UserWarning ) # Need ε in float64 arithmetic; clamp to avoid underflow producing # a singular pure state and -inf in the log. min_log_eps = float(np.log(np.finfo(float).tiny)) log_eps_clamped = max(log_eps, min_log_eps) eps_val = float(np.exp(log_eps_clamped)) rho_eps = (1 - eps_val) * rho_bell + eps_val * sigma # Ensure Hermitian and compute log via eigendecomposition rho_eps = (rho_eps + rho_eps.conj().T) / 2 eigvals, U = eigh(rho_eps) # Clamp small eigenvalues for numerical stability eigvals = np.maximum(eigvals, np.finfo(float).tiny) log_eigvals = np.log(eigvals) log_rho = U @ np.diag(log_eigvals) @ U.conj().T # Extract natural parameters by projection onto operator basis # θₐ = Tr(log(ρ) Fₐ) / Tr(FₐFₐ) theta = np.zeros(self.n_params) for a, F_a in enumerate(self.operators): numerator = np.real(np.trace(log_rho @ F_a)) denominator = np.real(np.trace(F_a @ F_a)) if denominator > 0: theta[a] = numerator / denominator else: theta[a] = 0.0 return theta
__all__ = [ "pauli_basis", "gell_mann_matrices", "qutrit_basis", "create_operator_basis", "QuantumExponentialFamily", ]