"""
Quantum exponential family and BKM metric interface for the quantum inaccessible game.
This module contains:
- local operator bases (Pauli, Gell-Mann, generalised Gell-Mann);
- construction of the full operator basis {F_a};
- the `QuantumExponentialFamily` class providing ρ(θ), log-partition ψ(θ),
Fisher/BKM metric G(θ), and the marginal-entropy constraint.
"""
from typing import Tuple, List, Optional, Dict, Any
import numpy as np
from scipy.linalg import expm, eigh, logm
from qig.core import marginal_entropies, partial_trace
from qig.pair_operators import (
pair_basis_generators,
multi_pair_basis,
bell_state_density_matrix,
product_of_bell_states
)
# ============================================================================
# Operator Basis: Local Lie Algebra Generators
# ============================================================================
[docs]
def pauli_basis(site: int, n_sites: int) -> List[np.ndarray]:
"""
Create Pauli operator basis for site 'site' in an n_sites qubit system.
Returns [sigma_x, sigma_y, sigma_z] tensored with identity on other sites.
"""
# Define Pauli matrices
I = np.array([[1, 0], [0, 1]], dtype=complex)
X = np.array([[0, 1], [1, 0]], dtype=complex)
Y = np.array([[0, -1j], [1j, 0]], dtype=complex)
Z = np.array([[1, 0], [0, -1]], dtype=complex)
paulis = [X, Y, Z]
operators: List[np.ndarray] = []
for P in paulis:
# Build tensor product: I ⊗ ... ⊗ P ⊗ ... ⊗ I
op = None
for i in range(n_sites):
current = P if i == site else I
op = current if op is None else np.kron(op, current)
operators.append(op)
return operators
[docs]
def gell_mann_matrices() -> List[np.ndarray]:
"""
Return the 8 Gell-Mann matrices (generators of SU(3)).
"""
gm: List[np.ndarray] = []
# λ1 and λ2 (off-diagonal in 01 block)
gm.append(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 0]], dtype=complex))
gm.append(np.array([[0, -1j, 0], [1j, 0, 0], [0, 0, 0]], dtype=complex))
# λ3 (diagonal in 01 block)
gm.append(np.array([[1, 0, 0], [0, -1, 0], [0, 0, 0]], dtype=complex))
# λ4 and λ5 (off-diagonal in 02 block)
gm.append(np.array([[0, 0, 1], [0, 0, 0], [1, 0, 0]], dtype=complex))
gm.append(np.array([[0, 0, -1j], [0, 0, 0], [1j, 0, 0]], dtype=complex))
# λ6 and λ7 (off-diagonal in 12 block)
gm.append(np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0]], dtype=complex))
gm.append(np.array([[0, 0, 0], [0, 0, -1j], [0, 1j, 0]], dtype=complex))
# λ8 (diagonal)
gm.append(np.array([[1, 0, 0], [0, 1, 0], [0, 0, -2]], dtype=complex) / np.sqrt(3))
return gm
[docs]
def qutrit_basis(site: int, n_sites: int) -> List[np.ndarray]:
"""
Create Gell-Mann operator basis for site 'site' in n_sites qutrit system.
"""
I = np.eye(3, dtype=complex)
gm = gell_mann_matrices()
operators: List[np.ndarray] = []
for G in gm:
op = None
for i in range(n_sites):
current = G if i == site else I
op = current if op is None else np.kron(op, current)
operators.append(op)
return operators
[docs]
def create_operator_basis(n_sites: int, d: int) -> Tuple[list, list]:
"""
Create full operator basis {F_a} for quantum exponential family.
Parameters
-----------
n_sites : int
Number of sites
d : int
Local dimension (2 for qubits, 3 for qutrits, d>=2 in general)
"""
operators: List[np.ndarray] = []
labels: List[str] = []
if d == 2:
# Qubits: Pauli basis
pauli_names = ["X", "Y", "Z"]
for site in range(n_sites):
ops = pauli_basis(site, n_sites)
operators.extend(ops)
labels.extend([f"{name}_{site+1}" for name in pauli_names])
elif d == 3:
# Qutrits: Gell-Mann basis
for site in range(n_sites):
ops = qutrit_basis(site, n_sites)
operators.extend(ops)
labels.extend([f"λ{k+1}_{site+1}" for k in range(8)])
elif d >= 4:
# Higher dimensions: use generalised Gell-Mann matrices
for site in range(n_sites):
# Create d^2-1 Hermitian traceless operators
basis_ops: List[np.ndarray] = []
# Off-diagonal symmetric and antisymmetric
for i in range(d):
for j in range(i + 1, d):
# Symmetric: |i><j| + |j><i|
op = np.zeros((d, d), dtype=complex)
op[i, j] = 1
op[j, i] = 1
basis_ops.append(op)
# Antisymmetric: -i|i><j| + i|j><i|
op = np.zeros((d, d), dtype=complex)
op[i, j] = -1j
op[j, i] = 1j
basis_ops.append(op)
# Diagonal traceless
for k in range(d - 1):
op = np.zeros((d, d), dtype=complex)
for i in range(k + 1):
op[i, i] = 1
op[k + 1, k + 1] = -(k + 1)
op = op / np.sqrt(k + 1 + (k + 1) ** 2)
basis_ops.append(op)
# Tensor with identity on other sites
for op_local in basis_ops:
op_full = None
for s in range(n_sites):
current = op_local if s == site else np.eye(d, dtype=complex)
op_full = current if op_full is None else np.kron(op_full, current)
operators.append(op_full)
labels.append(f"H{len(operators)}_{site+1}")
else:
raise ValueError(f"Dimension d={d} must be >= 2.")
return operators, labels
# ============================================================================
# Quantum Exponential Family
# ============================================================================
[docs]
class QuantumExponentialFamily:
"""
Quantum exponential family: ρ(θ) = exp(∑ θ_a F_a - ψ(θ))
"""
[docs]
def __init__(self, n_sites: Optional[int] = None, d: int = 2,
n_pairs: Optional[int] = None, pair_basis: bool = False):
"""
Initialise quantum exponential family.
Parameters
-----------
n_sites : int, optional
Number of subsystems (for local operator basis)
d : int
Local dimension (2 for qubits, 3 for qutrits)
n_pairs : int, optional
Number of entangled pairs (for pair basis)
pair_basis : bool
If True, use su(d²) operators for entangled pairs
If False, use local operators (default)
Notes
-----
Two modes of operation:
1. Local operators (pair_basis=False, default):
- Specify n_sites: number of independent subsystems
- Uses operators like σ_x⊗I, I⊗σ_y (local Pauli/Gell-Mann)
- Can ONLY represent separable states
- Suitable for testing, but NOT for quantum game with entanglement
2. Pair operators (pair_basis=True):
- Specify n_pairs: number of entangled pairs
- Uses su(d²) generators acting on each pair
- Can represent entangled states (including Bell states)
- Required for proper quantum inaccessible game
- Fisher metric G is block-diagonal (one block per pair)
"""
self.pair_basis = pair_basis
if pair_basis:
# Pair-based operators for entangled pairs
if n_pairs is None:
raise ValueError("Must specify n_pairs when using pair_basis=True")
self.n_pairs = n_pairs
self.n_sites = 2 * n_pairs # Each pair has 2 subsystems
self.d = d
self.dims = [d] * self.n_sites
self.D = d**(2 * n_pairs) # Hilbert space: (d²)^n_pairs
# Create pair operator basis
self.operators, self.pair_indices = multi_pair_basis(n_pairs, d)
self.labels = [f"F{a}_pair{k}" for k in range(n_pairs)
for a in range(d**4 - 1)]
self.n_params = len(self.operators)
print(f"Initialised {n_pairs}-pair system with d={d} (pair basis)")
print(f"Number of subsystems: {self.n_sites} (2 per pair)")
print(f"Hilbert space dimension: {self.D}")
print(f"Number of parameters: {self.n_params} = {n_pairs} × {d**4-1}")
else:
# Local operators (original mode)
if n_sites is None:
raise ValueError("Must specify n_sites when using pair_basis=False")
self.n_sites = n_sites
self.d = d
self.dims = [d] * n_sites
self.D = d**n_sites
self.n_pairs = None
self.pair_indices = None
# Create operator basis
self.operators, self.labels = create_operator_basis(n_sites, d)
self.n_params = len(self.operators)
print(f"Initialised {n_sites}-site system with d={d} (local basis)")
print(f"Hilbert space dimension: {self.D}")
print(f"Number of parameters: {self.n_params}")
def _lift_to_full_space(self, op_i: np.ndarray, site_i: int) -> np.ndarray:
"""
Lift operator on subsystem i to full Hilbert space.
This is the adjoint of partial_trace(): it embeds a marginal operator
into the full space by tensoring with identity operators on other subsystems.
Result: I₀ ⊗ ... ⊗ I_{i-1} ⊗ op_i ⊗ I_{i+1} ⊗ ... ⊗ I_{n-1}
Parameters
----------
op_i : ndarray, shape (d_i, d_i)
Operator on subsystem i
site_i : int
Which subsystem (0 to n_sites-1)
Returns
-------
op_full : ndarray, shape (D, D)
Operator on full Hilbert space
"""
result = None
for j, d_j in enumerate(self.dims):
if j == site_i:
current = op_i
else:
current = np.eye(d_j, dtype=complex)
result = current if result is None else np.kron(result, current)
return result
def _bkm_kernel(self, rho: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Compute BKM (Bogoliubov-Kubo-Mori) kernel from density matrix eigenvalues.
The BKM kernel for the quantum Fisher information metric is:
k(p_i, p_j) = (p_i - p_j) / (log p_i - log p_j) for i ≠ j
k(p_i, p_i) = p_i for i = j
This kernel appears in the BKM inner product:
⟨A, B⟩_BKM = ∑_{i,j} k(p_i, p_j) A[i,j] conj(B[i,j])
where A, B are operators in the eigenbasis of ρ.
Parameters
----------
rho : ndarray, shape (D, D)
Density matrix
Returns
-------
k : ndarray, shape (D, D)
BKM kernel matrix
p : ndarray, shape (D,)
Eigenvalues of ρ (clipped to ≥ 1e-14)
U : ndarray, shape (D, D)
Eigenvectors of ρ (columns)
"""
from scipy.linalg import eigh
p, U = eigh(rho)
p = np.clip(p.real, 1e-14, None)
# Build kernel matrix
p_i = p[:, None]
p_j = p[None, :]
diff = p_i - p_j
log_diff = np.log(p_i) - np.log(p_j)
k = np.zeros_like(diff)
# For non-degenerate eigenvalues: k(p_i, p_j) = (p_i - p_j)/(log p_i - log p_j)
non_degenerate = np.abs(diff) > 1e-14
k[non_degenerate] = diff[non_degenerate] / log_diff[non_degenerate]
# For degenerate or near-degenerate eigenvalues: k(p_i, p_j) → (p_i + p_j)/2 ≈ p
degenerate = np.abs(diff) <= 1e-14
k[degenerate] = 0.5 * (p_i + p_j)[degenerate]
return k, p, U
[docs]
def rho_from_theta(self, theta: np.ndarray) -> np.ndarray:
"""
Compute ρ(θ) = exp(K(θ) - ψ(θ)) where K(θ) = ∑ θ_a F_a.
"""
# Numerical stability:
# ρ = expm(K) / Tr(expm(K)) is invariant under K -> K - cI.
# Shifting by the max eigenvalue avoids overflow/underflow in expm(K)
# for large ||theta|| (common near pure-state limits).
K = np.zeros((self.D, self.D), dtype=complex)
for theta_a, F_a in zip(theta, self.operators):
K += theta_a * F_a
# K is Hermitian for real theta and Hermitian generators.
lambda_max = float(np.max(np.linalg.eigvalsh(K).real))
K_shift = K - lambda_max * np.eye(self.D, dtype=complex)
rho_unnorm = expm(K_shift)
Z = np.trace(rho_unnorm)
rho = rho_unnorm / Z
return rho
[docs]
def psi(self, theta: np.ndarray) -> float:
"""
Compute the cumulant generating function ψ(θ) = log Tr(exp(∑ θ_a F_a)).
This is the log partition function for the exponential family.
"""
K = sum(theta_a * F_a for theta_a, F_a in zip(theta, self.operators))
lambda_max = float(np.max(np.linalg.eigvalsh(K).real))
K_shift = K - lambda_max * np.eye(self.D, dtype=complex)
Z_shift = np.trace(expm(K_shift))
return float(lambda_max + np.log(Z_shift).real)
# Backward compatibility alias
[docs]
def log_partition(self, theta: np.ndarray) -> float:
"""
Deprecated: Use psi() instead.
Compute the cumulant generating function ψ(θ) = log Tr(exp(∑ θ_a F_a)).
"""
import warnings
warnings.warn("log_partition() is deprecated, use psi() instead",
DeprecationWarning, stacklevel=2)
return self.psi(theta)
[docs]
def rho_derivative(self, theta: np.ndarray, a: int, **kwargs) -> np.ndarray:
"""
Compute ∂ρ/∂θ_a using the correct quantum formula.
Three main methods available.
1. 'sld': Symmetric Logarithmic Derivative (fast, ~5% error)
∂ρ/∂θ = (1/2)[ρ(F - ⟨F⟩I) + (F - ⟨F⟩I)ρ]
2. 'duhamel': Duhamel exponential formula with numerical quadrature
(slower, ~5e-6 error for n_points≈200)
∂ρ/∂θ = ∫₀¹ exp(sH)(F - ⟨F⟩I)exp((1-s)H) ds
3. 'duhamel_spectral' / 'duhamel_bch': Duhamel formula via spectral/BCH
representation of H (no explicit s-quadrature, high precision).
4. 'duhamel_block': Higham block-matrix identity for the Fréchet derivative
(one 2D×2D matrix exponential; no eigendecomposition, no quadrature).
⚠️ QUANTUM ALERT: Simple ρ(F - ⟨F⟩I) is WRONG for non-commuting operators!
Parameters
----------
theta : ndarray, shape (n_params,)
Natural parameters
a : int
Parameter index
method : str, default='sld'
'sld' for fast SLD, 'duhamel' for high precision
n_points : int, default=100
Quadrature points for Duhamel (ignored for 'sld')
Returns
-------
drho : ndarray, shape (D, D)
Derivative ∂ρ/∂θ_a (Hermitian matrix)
Notes
-----
Quantum derivative principles applied:
✅ Check operator commutation: Uses symmetric/integral form
✅ Verify operator ordering: Preserves ordering in exponentials
✅ Distinguish quantum vs classical: Uses quantum formulas
✅ Respect Hilbert space structure: Preserves Hermiticity
"""
method = kwargs.get('method', 'sld')
n_points = kwargs.get('n_points', 200) # n=200 gives ~3.6e-06 error; n=100→1.5e-05
rho = self.rho_from_theta(theta)
F_a = self.operators[a]
mean_Fa = np.trace(rho @ F_a).real
I = np.eye(self.D, dtype=complex)
F_centered = F_a - mean_Fa * I
if method == 'sld':
# Symmetric logarithmic derivative (fast approximation)
drho = 0.5 * (rho @ F_centered + F_centered @ rho)
elif method == 'duhamel':
# High-precision Duhamel formula via numerical quadrature
from qig.duhamel import duhamel_derivative, compute_H_from_theta
H, K, psi = compute_H_from_theta(self.operators, theta)
drho = duhamel_derivative(rho, H, F_centered, n_points)
elif method == 'duhamel_block':
# Higham block-matrix identity (Fréchet derivative via a single expm on 2D×2D)
from qig.duhamel import duhamel_derivative_block, compute_H_from_theta
H, K, psi = compute_H_from_theta(self.operators, theta)
drho = duhamel_derivative_block(rho, H, F_centered)
elif method in ('duhamel_spectral', 'duhamel_bch'):
# Duhamel via spectral/BCH representation of H (no explicit s-quadrature)
from qig.duhamel import duhamel_derivative_spectral, compute_H_from_theta
H, K, psi = compute_H_from_theta(self.operators, theta)
drho = duhamel_derivative_spectral(rho, H, F_centered)
else:
raise ValueError(
f"Unknown method: {method}. "
"Use 'sld', 'duhamel', 'duhamel_block', or 'duhamel_spectral'/'duhamel_bch'"
)
return drho
[docs]
def rho_second_derivative(self, theta: np.ndarray, a: int, b: int,
method: str = 'numerical_duhamel',
n_points: int = 100, eps: float = 1e-7) -> np.ndarray:
"""
Compute ∂²ρ/∂θ_a∂θ_b using high-precision numerical differentiation.
Strategy: Use finite differences of the high-precision Duhamel ∂ρ/∂θ.
∂²ρ/∂θ_a∂θ_b ≈ [∂ρ/∂θ_a(θ+ε·e_b) - ∂ρ/∂θ_a(θ-ε·e_b)] / (2ε)
This gives 0.55-2.6% error, which is 30-100× better than the
analytic SLD-based formula.
Parameters
----------
theta : ndarray, shape (n_params,)
Natural parameters
a : int
First parameter index
b : int
Second parameter index
method : str, default='numerical_duhamel'
Currently only 'numerical_duhamel' is supported
n_points : int, default=100
Quadrature points for Duhamel integration
eps : float, default=1e-7
Finite difference step size
Returns
-------
d2rho : ndarray, shape (D, D)
Second derivative ∂²ρ/∂θ_a∂θ_b (Hermitian matrix)
Notes
-----
- Uses central differences for better accuracy
- Hermiticity preserved to machine precision
- More accurate than analytic SLD-based second derivative
Quantum derivative principles applied:
✅ Respects non-commutativity through Duhamel integration
✅ Preserves Hermiticity
✅ Uses high-precision first derivatives
"""
if method != 'numerical_duhamel':
raise ValueError(f"Only 'numerical_duhamel' supported, got {method}")
# Compute ∂ρ/∂θ_a at θ + ε·e_b
theta_plus = theta.copy()
theta_plus[b] += eps
drho_a_plus = self.rho_derivative(theta_plus, a, method='duhamel', n_points=n_points)
# Compute ∂ρ/∂θ_a at θ - ε·e_b
theta_minus = theta.copy()
theta_minus[b] -= eps
drho_a_minus = self.rho_derivative(theta_minus, a, method='duhamel', n_points=n_points)
# Central difference
d2rho = (drho_a_plus - drho_a_minus) / (2 * eps)
return d2rho
def _fisher_block(
self, rho: np.ndarray, operators: List[np.ndarray]
) -> np.ndarray:
"""
Compute Fisher information block for a single subsystem.
This is the BKM metric computed from a single marginal density matrix
and its associated operators.
Parameters
----------
rho : ndarray, shape (d², d²)
Density matrix for a single pair
operators : List[ndarray]
List of d⁴-1 operators, each of shape (d², d²)
Returns
-------
G : ndarray, shape (d⁴-1, d⁴-1)
Fisher information block
"""
from scipy.linalg import eigh
D = rho.shape[0]
n = len(operators)
# Eigendecomposition
eigvals, U = eigh(rho)
p = np.clip(np.real(eigvals), 1e-14, None)
# Centre operators and transform to eigenbasis
A_tilde = np.zeros((n, D, D), dtype=complex)
I = np.eye(D, dtype=complex)
for a, F_a in enumerate(operators):
mean_Fa = np.trace(rho @ F_a).real
A_a = F_a - mean_Fa * I
A_tilde[a] = U.conj().T @ A_a @ U
# BKM kernel
p_i = p[:, None]
p_j = p[None, :]
diff = p_i - p_j
log_diff = np.log(p_i) - np.log(p_j)
k = np.zeros_like(diff)
non_degenerate = np.abs(diff) > 1e-14
k[non_degenerate] = diff[non_degenerate] / log_diff[non_degenerate]
degenerate = np.abs(diff) <= 1e-14
k[degenerate] = 0.5 * (p_i + p_j)[degenerate]
# Assemble metric
G = np.zeros((n, n))
for a in range(n):
A_a = A_tilde[a]
for b in range(a, n):
A_b = A_tilde[b]
prod = A_a * np.conj(A_b)
Gab = float(np.real(np.sum(k * prod)))
G[a, b] = Gab
G[b, a] = Gab
return 0.5 * (G + G.T)
def _is_product_state(self, rho: np.ndarray, tol: float = 1e-6) -> bool:
"""
Check if ρ is approximately a product state ρ₁ ⊗ ... ⊗ ρₙ.
Uses the criterion: ρ is a product iff ρ = ⊗ₖ Trₖ̄(ρ)
where Trₖ̄ traces out all pairs except k.
Parameters
----------
rho : ndarray
Density matrix
tol : float
Tolerance for comparison
Returns
-------
bool
True if ρ is approximately a product state
"""
if not self.pair_basis or self.n_pairs == 1:
return True # Single pair is trivially "product"
# Compute product of marginals
D_pair = self.d ** 2
rho_product = np.array([[1.0]])
for k in range(self.n_pairs):
rho_k = self._partial_trace_to_pair(rho, k)
rho_product = np.kron(rho_product, rho_k)
return np.allclose(rho, rho_product, atol=tol)
[docs]
def marginal_entropy_constraint_theta_only(
self, theta: np.ndarray
) -> Tuple[float, np.ndarray]:
"""
Compute constraint C(θ) = ∑ᵢ hᵢ and gradient ∇C using θ-only formulas.
This is a fast, exact method that avoids materializing ∂ρ/∂θ for each parameter.
Instead, it uses the BKM inner product:
∂C/∂θ_a = ⟨F̃_a, B⟩_BKM
where:
F̃_a = F_a - ⟨F_a⟩I (centered operator)
B = ∑ᵢ Bᵢ (lifted test operator)
Bᵢ = (log ρᵢ + Iᵢ) ⊗ I_rest
This reuses the eigendecomposition and BKM kernel from fisher_information(),
achieving machine precision (~10⁻¹⁴) with ~100× speedup over Duhamel method.
Parameters
----------
theta : ndarray
Natural parameters
Returns
-------
C : float
Constraint value ∑ᵢ hᵢ
grad_C : ndarray, shape (n_params,)
Gradient ∇C
"""
rho = self.rho_from_theta(theta)
h = marginal_entropies(rho, self.dims)
C = float(np.sum(h))
# Get eigendecomposition and BKM kernel
k, p, U = self._bkm_kernel(rho)
# Build lifted test operator B = ∑ᵢ (log ρᵢ + Iᵢ) ⊗ I_rest
B_full = np.zeros((self.D, self.D), dtype=complex)
for i in range(self.n_sites):
# Compute marginal ρᵢ
rho_i = partial_trace(rho, self.dims, keep=i)
# Compute log(ρᵢ) safely using eigendecomposition
eigvals_i, eigvecs_i = eigh(rho_i)
eigvals_i = np.maximum(eigvals_i.real, 1e-14)
log_eigvals_i = np.log(eigvals_i)
log_rho_i = eigvecs_i @ np.diag(log_eigvals_i) @ eigvecs_i.conj().T
# Lifted operator: (log ρᵢ + Iᵢ) ⊗ I_rest
B_i = log_rho_i + np.eye(self.dims[i], dtype=complex)
B_full += self._lift_to_full_space(B_i, i)
# Transform B to eigenbasis of ρ
B_tilde = U.conj().T @ B_full @ U
# Compute gradient via BKM inner products
grad_C = np.zeros(self.n_params)
I_full = np.eye(self.D, dtype=complex)
for a, F_a in enumerate(self.operators):
# Center operator: F̃_a = F_a - ⟨F_a⟩I
mean_Fa = np.trace(rho @ F_a).real
F_tilde = F_a - mean_Fa * I_full
# Transform to eigenbasis
F_tilde_eigen = U.conj().T @ F_tilde @ U
# BKM inner product: ⟨F̃_a, B⟩_BKM = ∑ᵢⱼ k[i,j] F̃_a[i,j] conj(B[i,j])
grad_C[a] = -np.real(np.sum(k * (F_tilde_eigen * np.conj(B_tilde))))
return C, grad_C
[docs]
def marginal_entropy_constraint(
self, theta: np.ndarray, method: str = 'theta_only'
) -> Tuple[float, np.ndarray]:
"""
Compute constraint value C(θ) = ∑_i h_i and gradient ∇C.
This method dispatches to different implementations based on the method parameter.
Parameters
----------
theta : ndarray
Natural parameters
method : str, default='theta_only'
Method for gradient computation. Options:
- ``'theta_only'``: Fast θ-only method using BKM inner products (default).
~100× faster, machine precision accuracy.
- ``'duhamel'``: Legacy method materializing ∂ρ/∂θ via Duhamel.
Slow but kept for verification.
- ``'sld'``: Legacy method using SLD approximation.
Faster than Duhamel but ~5% error.
Returns
-------
C : float
Constraint value ∑ᵢ hᵢ
grad_C : ndarray
Gradient ∇C
"""
if method == 'theta_only':
return self.marginal_entropy_constraint_theta_only(theta)
# Legacy method: materialize ∂ρ/∂θ for each parameter
rho = self.rho_from_theta(theta)
h = marginal_entropies(rho, self.dims)
C = float(np.sum(h))
# Compute gradient by materializing drho
grad_C = np.zeros(self.n_params)
I = np.eye(self.D, dtype=complex)
for a in range(self.n_params):
F_a = self.operators[a]
# Compute ∂ρ/∂θ_a using specified method (duhamel or sld)
drho_dtheta_a = self.rho_derivative(theta, a, method=method)
# Sum over all subsystems
for i in range(self.n_sites):
# Compute marginal ρ_i
rho_i = partial_trace(rho, self.dims, keep=i)
# Compute ∂ρ_i/∂θ_a (partial trace of ∂ρ/∂θ_a)
drho_i_dtheta_a = partial_trace(drho_dtheta_a, self.dims, keep=i)
# Compute log(ρ_i) with regularisation
eigvals_i, eigvecs_i = eigh(rho_i)
eigvals_i = np.maximum(eigvals_i.real, 1e-14)
log_eigvals_i = np.log(eigvals_i)
log_rho_i = eigvecs_i @ np.diag(log_eigvals_i) @ eigvecs_i.conj().T
# Compute ∂h_i/∂θ_a = -Tr((∂ρ_i/∂θ_a) log(ρ_i))
dh_i_dtheta_a = -np.trace(drho_i_dtheta_a @ log_rho_i).real
grad_C[a] += dh_i_dtheta_a
return C, grad_C
[docs]
def third_cumulant_contraction(self, theta: np.ndarray, method: str = 'fd') -> np.ndarray:
"""
Compute (∇G)[θ], the third cumulant tensor contracted with θ.
This is the matrix with (i,j) entry: ∑_k (∂G_ik/∂θ_j) θ_k
Following the paper's Appendix (eq. 824-826), this appears in the Jacobian as:
M = -G - (∇G)[θ] + ...
The third cumulant ∇G = ∇³ψ is totally symmetric in all three indices.
We compute ∂G_ab/∂θ_c by differentiating the spectral BKM formula using
perturbation theory for eigenvalues and eigenvectors.
Parameters
----------
theta : ndarray, shape (n_params,)
Natural parameters
method : str, optional
Method for computing the third cumulant:
- 'fd' (default): Finite differences of Fisher metric (fast, accurate)
- 'analytic': Analytic perturbation theory (slow, approximate)
Returns
-------
contraction : ndarray, shape (n_params, n_params)
Matrix (∇G)[θ] with (i,j) entry = ∑_k (∂G_ik/∂θ_j) θ_k
Notes
-----
Quantum derivative principles applied:
✅ Check operator commutation: F_a, F_b, F_c may not commute
✅ Verify operator ordering: Careful in spectral differentiation
✅ Distinguish quantum vs classical: Uses quantum covariance derivatives
✅ Respect Hilbert space structure: Works on full Hilbert space
✅ Question each derivative step: Uses perturbation theory
The finite difference method is much faster (~100-500×) and avoids
expensive ∂ρ/∂θ computations.
"""
if method == 'fd':
return self._third_cumulant_contraction_fd(theta)
elif method == 'analytic':
return self._third_cumulant_contraction_analytic(theta)
elif method == 'block':
return self._third_cumulant_contraction_block(theta)
else:
raise ValueError(f"Unknown method '{method}'. Use 'fd', 'analytic', or 'block'.")
[docs]
def psi_hessian_block(
self, theta: np.ndarray, param_indices: Optional[List[int]] = None
) -> np.ndarray:
"""
Compute Hessian of ψ(θ) = log Tr(exp(K(θ))) via block Fréchet derivatives.
This returns the (selected) 2nd cumulant matrix:
∂²ψ/∂θ_a∂θ_b
Notes
-----
This method is intended for validation / small systems. It scales as
O(m²) block matrix exponentials, where m = len(param_indices) (or n_params).
"""
from scipy.linalg import expm
from qig.duhamel import expm_frechet_block_2
if param_indices is None:
param_indices = list(range(self.n_params))
m = len(param_indices)
# Build K(θ) = Σ θ_a F_a
K = np.zeros((self.D, self.D), dtype=complex)
for theta_a, F_a in zip(theta, self.operators):
K += theta_a * F_a
expK = expm(K)
Z = np.trace(expK)
Z_real = float(np.real(Z))
if not np.isfinite(Z_real) or abs(Z) < 1e-300:
raise FloatingPointError("Trace(exp(K)) is not finite; reduce ||theta||.")
# First derivatives of Z: dZ_a = Tr(exp(K) F_a)
dZ = np.zeros(m, dtype=float)
for i, a in enumerate(param_indices):
dZ[i] = float(np.real(np.trace(expK @ self.operators[a])))
# Second derivatives of Z via block trick: d2Z_ab = Tr(D² exp_K[F_a, F_b])
d2Z = np.zeros((m, m), dtype=float)
for i, a in enumerate(param_indices):
E = self.operators[a]
for j, b in enumerate(param_indices[i:], start=i):
F = self.operators[b]
# The 3×3 block construction gives an *ordered* second-derivative piece.
# The true 2nd Fréchet derivative is symmetric in (E, F), so we sum both
# orderings (no 1/2 factor).
d2_ef = np.trace(expm_frechet_block_2(K, E, F))
d2_fe = np.trace(expm_frechet_block_2(K, F, E))
d2_real = float(np.real(d2_ef + d2_fe))
d2Z[i, j] = d2_real
d2Z[j, i] = d2_real
# Hessian of log Z: ψ'' = Z''/Z - (Z'/Z)(Z'/Z)^T
H = d2Z / Z_real - np.outer(dZ, dZ) / (Z_real * Z_real)
return 0.5 * (H + H.T)
def _third_cumulant_contraction_block(self, theta: np.ndarray) -> np.ndarray:
"""
Compute (∇G)[θ] using 3rd derivatives of ψ via block Fréchet derivatives.
This is intended for small systems (e.g. single qubit / qutrit) and
validation. For larger models use method='fd' (default).
"""
from itertools import permutations
from scipy.linalg import expm
from qig.duhamel import expm_frechet_block_2, expm_frechet_block_3
n = self.n_params
if n > 20:
raise ValueError(
"method='block' is intended for small n_params (≤20). "
"Use method='fd' for larger systems."
)
# Build K(θ) and direction ΔK = Σ θ_a F_a = K itself.
K = np.zeros((self.D, self.D), dtype=complex)
for theta_a, F_a in zip(theta, self.operators):
K += theta_a * F_a
K_dir = K
expK = expm(K)
Z = np.trace(expK)
Z_real = float(np.real(Z))
if not np.isfinite(Z_real) or abs(Z) < 1e-300:
raise FloatingPointError("Trace(exp(K)) is not finite; reduce ||theta||.")
# First derivatives dZ_a and dZ_K
dZ = np.array([float(np.real(np.trace(expK @ F_a))) for F_a in self.operators])
dZK = float(np.real(np.trace(expK @ K_dir)))
# Second derivatives needed: d2Z_ab (all pairs) and d2Z_aK (vector)
d2Z = np.zeros((n, n), dtype=float)
for a in range(n):
E = self.operators[a]
for b in range(a, n):
F = self.operators[b]
# Sum both orderings to obtain the symmetric 2nd Fréchet derivative.
d2_ef = np.trace(expm_frechet_block_2(K, E, F))
d2_fe = np.trace(expm_frechet_block_2(K, F, E))
val = float(np.real(d2_ef + d2_fe))
d2Z[a, b] = val
d2Z[b, a] = val
d2Z_aK = np.zeros(n, dtype=float)
for a in range(n):
E = self.operators[a]
d2_eK = np.trace(expm_frechet_block_2(K, E, K_dir))
d2_Ke = np.trace(expm_frechet_block_2(K, K_dir, E))
d2Z_aK[a] = float(np.real(d2_eK + d2_Ke))
# Third derivative contraction: ψ3(F_a, F_b, K_dir)
contraction = np.zeros((n, n), dtype=float)
for a in range(n):
E = self.operators[a]
for b in range(a, n):
F = self.operators[b]
# The 4×4 block construction yields ordered 3rd-derivative pieces.
# The true 3rd Fréchet derivative is symmetric multilinear, so we sum
# over all 3! orderings.
dirs = (E, F, K_dir)
d3_sum = 0.0 + 0.0j
for perm in permutations(dirs, 3):
d3_sum += np.trace(expm_frechet_block_3(K, perm[0], perm[1], perm[2]))
d3Z = float(np.real(d3_sum))
# ψ3 = Z'''/Z - (Z'' Z' + perms)/Z^2 + 2 Z'Z'Z'/Z^3
term1 = d3Z / Z_real
term2 = (
d2Z[a, b] * dZK + d2Z_aK[a] * dZ[b] + d2Z_aK[b] * dZ[a]
) / (Z_real * Z_real)
term3 = 2.0 * dZ[a] * dZ[b] * dZK / (Z_real * Z_real * Z_real)
val = term1 - term2 + term3
contraction[a, b] = val
contraction[b, a] = val
return 0.5 * (contraction + contraction.T)
def _third_cumulant_contraction_fd(self, theta: np.ndarray, eps: float = 1e-5) -> np.ndarray:
"""
Compute (∇G)[θ] using finite differences of the Fisher metric.
Fast method: ∂G_ab/∂θ_c ≈ [G_ab(θ + ε·e_c) - G_ab(θ - ε·e_c)] / (2ε)
Then contract: (∇G)[θ]_ab = Σ_c (∂G_ab/∂θ_c) θ_c
Expected speedup: 100-500× over analytic method.
Expected accuracy: ~10⁻⁸.
"""
n = self.n_params
# Compute ∂G_ab/∂θ_c for all c by finite differences
dG_dtheta = np.zeros((n, n, n)) # [a, b, c]
theta_perturbed = theta.copy()
for c in range(n):
# Forward perturbation
theta_perturbed[c] = theta[c] + eps
G_plus = self.fisher_information(theta_perturbed)
# Backward perturbation
theta_perturbed[c] = theta[c] - eps
G_minus = self.fisher_information(theta_perturbed)
# Central difference
dG_dtheta[:, :, c] = (G_plus - G_minus) / (2 * eps)
# Reset
theta_perturbed[c] = theta[c]
# Contract with θ: (∇G)[θ]_ab = Σ_c (∂G_ab/∂θ_c) θ_c
contraction = np.einsum('abc,c->ab', dG_dtheta, theta)
return contraction
def _third_cumulant_contraction_analytic(self, theta: np.ndarray) -> np.ndarray:
"""
Compute (∇G)[θ] using analytic perturbation theory.
This is the original implementation - slow but exact (modulo approximations).
Kept for reference and validation.
"""
rho = self.rho_from_theta(theta)
# Eigendecomposition of ρ
eigvals, eigvecs = eigh(rho)
eigvals = np.maximum(eigvals.real, 1e-14) # Regularise
# Compute BKM kernel k(p_i, p_j)
def bkm_kernel(p_i, p_j, eps=1e-14):
"""BKM kernel: (p - q)/(log p - log q) if p ≠ q, else p."""
if np.abs(p_i - p_j) < eps:
return p_i
else:
return (p_i - p_j) / (np.log(p_i) - np.log(p_j))
# Compute centered operators in eigenbasis
def centered_operator_eigenbasis(F_a):
"""Compute F̃_a = F_a - ⟨F_a⟩I in eigenbasis of ρ."""
mean_Fa = np.trace(rho @ F_a).real
F_a_centered = F_a - mean_Fa * np.eye(self.D, dtype=complex)
# Transform to eigenbasis
return eigvecs.conj().T @ F_a_centered @ eigvecs
# Compute ∂ρ/∂θ for all parameters
I = np.eye(self.D, dtype=complex)
drho_dtheta = []
for a in range(self.n_params):
drho_dtheta.append(self.rho_derivative(theta, a))
# Compute eigenvalue derivatives using Hellmann-Feynman theorem
# ∂λ_i/∂θ_a = ⟨i|∂ρ/∂θ_a|i⟩
d_eigvals_dtheta = np.zeros((self.D, self.n_params))
for a in range(self.n_params):
drho_a_eigenbasis = eigvecs.conj().T @ drho_dtheta[a] @ eigvecs
d_eigvals_dtheta[:, a] = np.diag(drho_a_eigenbasis).real
# Compute eigenvector derivatives using first-order perturbation theory
# ∂|i⟩/∂θ_a = ∑_{j≠i} (⟨j|∂ρ/∂θ_a|i⟩)/(λ_i - λ_j) |j⟩
# This is stored as a matrix: d_eigvecs_dtheta[a] @ eigvecs gives the derivative
d_eigvecs_dtheta = []
for a in range(self.n_params):
drho_a_eigenbasis = eigvecs.conj().T @ drho_dtheta[a] @ eigvecs
d_U = np.zeros((self.D, self.D), dtype=complex)
for i in range(self.D):
for j in range(self.D):
if i != j:
denom = eigvals[i] - eigvals[j]
if np.abs(denom) > 1e-10:
# ∂U_ji/∂θ_a (column i of U is eigenvector i)
d_U[j, i] = drho_a_eigenbasis[j, i] / denom
d_eigvecs_dtheta.append(d_U)
# Now compute ∂G_ab/∂θ_c for all a,b,c
# Then contract with θ to get (∇G)[θ]
contraction = np.zeros((self.n_params, self.n_params))
for a in range(self.n_params):
for b in range(self.n_params):
# Compute ∂G_ab/∂θ_c for all c
dG_ab_dtheta = np.zeros(self.n_params)
# Get centered operators in eigenbasis
A_a = centered_operator_eigenbasis(self.operators[a])
A_b = centered_operator_eigenbasis(self.operators[b])
for c in range(self.n_params):
# Differentiate the spectral BKM formula
# G_ab = ∑_{i,j} k(p_i, p_j) * A_a[i,j] * conj(A_b[i,j])
# Three terms from product rule:
# 1. ∂k/∂θ_c * A_a * conj(A_b)
# 2. k * ∂A_a/∂θ_c * conj(A_b)
# 3. k * A_a * conj(∂A_b/∂θ_c)
dG_ab_c = 0.0
for i in range(self.D):
for j in range(self.D):
k_ij = bkm_kernel(eigvals[i], eigvals[j])
# Term 1: ∂k/∂θ_c
# ∂k/∂p_i and ∂k/∂p_j, then chain rule with ∂p_i/∂θ_c
if np.abs(eigvals[i] - eigvals[j]) < 1e-14:
# k = p_i, so ∂k/∂p_i = 1, ∂k/∂p_j = 0
dk_dtheta_c = d_eigvals_dtheta[i, c]
else:
log_diff = np.log(eigvals[i]) - np.log(eigvals[j])
p_diff = eigvals[i] - eigvals[j]
# ∂k/∂p_i = (log_diff - p_diff/p_i) / log_diff²
# ∂k/∂p_j = -(log_diff - p_diff/p_j) / log_diff²
dk_dp_i = (log_diff - p_diff/eigvals[i]) / (log_diff**2)
dk_dp_j = -(log_diff - p_diff/eigvals[j]) / (log_diff**2)
dk_dtheta_c = dk_dp_i * d_eigvals_dtheta[i, c] + dk_dp_j * d_eigvals_dtheta[j, c]
term1 = dk_dtheta_c * A_a[i,j] * np.conj(A_b[i,j])
# Terms 2 & 3: ∂A_a/∂θ_c and ∂A_b/∂θ_c
# A_a = U† F̃_a U, so ∂A_a/∂θ = (∂U†/∂θ) F̃_a U + U† (∂F̃_a/∂θ) U + U† F̃_a (∂U/∂θ)
# This is complex, so for now use the fact that the derivative involves
# the eigenvector derivatives we computed
# Simplified: Assume the main contribution comes from eigenvalue changes
# (This is an approximation - full implementation would include eigenvector derivatives)
# For now, just use term 1
dG_ab_c += term1.real
dG_ab_dtheta[c] = dG_ab_c
# Contract with θ: ∑_c (∂G_ab/∂θ_c) θ_c
contraction[a, b] = np.dot(dG_ab_dtheta, theta)
return contraction
[docs]
def constraint_hessian_fd_theta_only(self, theta: np.ndarray, eps: float = 1e-5) -> np.ndarray:
"""
Compute constraint Hessian ∇²C using finite differences of θ-only gradient.
This is a fast, accurate method that computes:
∂²C/∂θ_a∂θ_b ≈ [∇C(θ + eps·e_b) - ∇C(θ - eps·e_b)]_a / (2·eps)
By differentiating the exact θ-only gradient, this achieves better accuracy
than the current approach which uses exact formulas with approximate second
derivatives (FD of Duhamel drho).
Expected speedup: 50-100× over current Duhamel-based method.
Expected accuracy: ~10⁻⁸ (better than current ~10⁻⁶).
Parameters
----------
theta : ndarray
Natural parameters
eps : float, optional
Finite difference step size (default: 1e-5)
Optimal for central differences: h ≈ (machine_eps)^(1/3) ≈ 1e-5
Returns
-------
hess : ndarray, shape (n_params, n_params)
Hessian matrix ∇²C, symmetric real matrix
Notes
-----
**Why this is better than current approach:**
Current (two approximations): Error ≈ 10⁻⁶
- ``∂²C/∂θ_a∂θ_b = f(∂²ρ/∂θ_a∂θ_b) = f(FD(∂ρ/∂θ)) = f(FD(Duhamel(ρ)))``
New (one approximation): Error ≈ 10⁻⁸
- ``∂²C/∂θ_a∂θ_b ≈ FD(∂C/∂θ_a) = FD(exact_BKM_formula(ρ))``
Key insight: Differentiating an exact gradient is more accurate than
using an exact formula with approximate second derivatives.
"""
n = self.n_params
hess = np.zeros((n, n))
# Compute Hessian by finite differences of θ-only gradient
for b in range(n):
# Perturbation in direction b
e_b = np.zeros(n)
e_b[b] = eps
# Compute gradients at θ ± eps·e_b using exact θ-only formula
_, grad_plus = self.marginal_entropy_constraint_theta_only(theta + e_b)
_, grad_minus = self.marginal_entropy_constraint_theta_only(theta - e_b)
# Central difference for column b
hess[:, b] = (grad_plus - grad_minus) / (2 * eps)
# Symmetrize to ensure exact symmetry (should already be symmetric to roundoff)
hess = 0.5 * (hess + hess.T)
return hess
[docs]
def constraint_hessian(self, theta: np.ndarray, method: str = 'fd_theta_only',
n_points: int = 100, eps: float = 1e-5) -> np.ndarray:
"""
Compute ∇²C, the Hessian of the constraint C(θ) = ∑ᵢ hᵢ(θ).
This method dispatches to different implementations based on the method parameter.
Parameters
----------
theta : ndarray, shape (n_params,)
Natural parameters
method : str, default='fd_theta_only'
Method for Hessian computation. Options:
- ``'fd_theta_only'``: FD of θ-only gradient (default).
~50-100× faster, ~10⁻⁸ error, recommended.
- ``'duhamel'``: FD of Duhamel drho (legacy, slow, ~10⁻⁶ error).
- ``'sld'``: Analytic formula using SLD (legacy, fast but ~10% error).
n_points : int, default=100
Quadrature points for Duhamel (ignored for other methods)
eps : float, default=1e-5
Finite difference step size
Returns
-------
hessian : ndarray, shape (n_params, n_params)
Constraint Hessian ∇²C (symmetric matrix)
Notes
-----
The default 'fd_theta_only' method uses finite differences of the exact
θ-only gradient, achieving better accuracy and much better performance
than the legacy methods which use exact formulas with approximate inputs.
"""
# Dispatch to appropriate implementation
if method == 'fd_theta_only':
return self.constraint_hessian_fd_theta_only(theta, eps=eps)
# Legacy methods: materialize ∂²ρ/∂θ_a∂θ_b for each (a,b)
from qig.core import partial_trace
rho = self.rho_from_theta(theta)
I_full = np.eye(self.D, dtype=complex)
# Compute ∂ρ/∂θ for all parameters
# Use the same method as for ∂²ρ for consistency
drho_method = 'duhamel' if method == 'duhamel' else 'sld'
drho_dtheta = []
for a in range(self.n_params):
drho_dtheta.append(self.rho_derivative(theta, a, method=drho_method, n_points=n_points))
# Initialise Hessian
hessian = np.zeros((self.n_params, self.n_params))
# Sum over all marginals
for i in range(self.n_sites):
# Compute marginal ρᵢ
rho_i = partial_trace(rho, self.dims, keep=i)
d_i = rho_i.shape[0]
I_i = np.eye(d_i, dtype=complex)
# Eigendecomposition of ρᵢ
eigvals_i, eigvecs_i = eigh(rho_i)
eigvals_i = np.maximum(eigvals_i.real, 1e-14)
# Compute log(ρᵢ)
log_eigvals_i = np.log(eigvals_i)
log_rho_i = eigvecs_i @ np.diag(log_eigvals_i) @ eigvecs_i.conj().T
# Compute ∂ρᵢ/∂θ for all parameters
drho_i_dtheta = []
for a in range(self.n_params):
drho_i_dtheta.append(partial_trace(drho_dtheta[a], self.dims, keep=i))
# Compute ∂²hᵢ/∂θ_a∂θ_b for all a, b
for a in range(self.n_params):
for b in range(a, self.n_params): # Only upper triangle (symmetric)
# Term 1: -Tr(∂²ρᵢ/∂θ_a∂θ_b (I + log ρᵢ))
# We need ∂²ρᵢ/∂θ_a∂θ_b
if method == 'duhamel':
# High-precision: numerical differentiation of Duhamel
# Gives ~0.5-2.6% error (30-100× better than SLD)
d2rho_dtheta_ab = self.rho_second_derivative(
theta, a, b, method='numerical_duhamel',
n_points=n_points, eps=eps
)
else: # method == 'sld'
# Fast analytic: SLD-based formula
# Gives ~8-12% error but much faster
# From ∂ρ/∂θ_a = (1/2)[ρ(F_a - ⟨F_a⟩I) + (F_a - ⟨F_a⟩I)ρ]:
# ∂²ρ/∂θ_a∂θ_b = (1/2)[∂ρ/∂θ_b (F_a - ⟨F_a⟩I) + (F_a - ⟨F_a⟩I) ∂ρ/∂θ_b]
# - ρ Cov_sym(F_b, F_a)
F_a = self.operators[a]
F_b = self.operators[b]
mean_Fa = np.trace(rho @ F_a).real
mean_Fb = np.trace(rho @ F_b).real
F_a_centered = F_a - mean_Fa * I_full
# Symmetrized covariance
cov_sym = 0.5 * (np.trace(rho @ F_b @ F_a).real +
np.trace(rho @ F_a @ F_b).real) - mean_Fb * mean_Fa
# ∂²ρ/∂θ_a∂θ_b (SLD-based)
d2rho_dtheta_ab = (0.5 * (drho_dtheta[b] @ F_a_centered
+ F_a_centered @ drho_dtheta[b])
- cov_sym * rho)
# Partial trace to get ∂²ρᵢ/∂θ_a∂θ_b
d2rho_i_dtheta_ab = partial_trace(d2rho_dtheta_ab, self.dims, keep=i)
# Term 1
term1 = -np.trace(d2rho_i_dtheta_ab @ (I_i + log_rho_i)).real
# Term 2: -Tr(∂ρᵢ/∂θ_a ∂(log ρᵢ)/∂θ_b)
# Compute ∂(log ρᵢ)/∂θ_b using Daleckii-Krein formula
# Transform ∂ρᵢ/∂θ_b to eigenbasis of ρᵢ
drho_i_b_eigenbasis = eigvecs_i.conj().T @ drho_i_dtheta[b] @ eigvecs_i
# Apply Daleckii-Krein formula element-wise
dlog_rho_i_eigenbasis = np.zeros_like(drho_i_b_eigenbasis)
for ii in range(d_i):
for jj in range(d_i):
if ii == jj:
# Diagonal: ∂(log ρᵢ)/∂θ_b = (∂ρᵢ/∂θ_b) / ρᵢ
dlog_rho_i_eigenbasis[ii, jj] = (
drho_i_b_eigenbasis[ii, jj] / eigvals_i[ii]
)
else:
# Off-diagonal: Daleckii-Krein formula
log_diff = log_eigvals_i[ii] - log_eigvals_i[jj]
p_diff = eigvals_i[ii] - eigvals_i[jj]
if np.abs(p_diff) > 1e-10:
dlog_rho_i_eigenbasis[ii, jj] = (
drho_i_b_eigenbasis[ii, jj] * log_diff / p_diff
)
else:
# Limit as p_i → p_j: (log p_i - log p_j)/(p_i - p_j) → 1/p_i
dlog_rho_i_eigenbasis[ii, jj] = (
drho_i_b_eigenbasis[ii, jj] / eigvals_i[ii]
)
# Transform back to original basis
dlog_rho_i_dtheta_b = (eigvecs_i @ dlog_rho_i_eigenbasis
[docs]
@ eigvecs_i.conj().T)
# Term 2
term2 = -np.trace(drho_i_dtheta[a] @ dlog_rho_i_dtheta_b).real
# ∂²hᵢ/∂θ_a∂θ_b
d2h_i = term1 + term2
# Add to Hessian (symmetric, so fill both (a,b) and (b,a))
hessian[a, b] += d2h_i
if a != b:
hessian[b, a] += d2h_i
return hessian
def lagrange_multiplier_gradient(self, theta: np.ndarray,
method: str = 'sld',
n_points: int = 100) -> np.ndarray:
"""
Compute ∇ν, the gradient of the Lagrange multiplier ν(θ).
From the paper (equations 831-832), the gradient components are:
``∂ν/∂θ_j = (1/||a||²) [a^T G e_j + a^T (∇G)[θ] e_j + (∇a)_j^T G θ - 2ν a^T (∇a)_j]``
where:
- ``ν = (a^T G θ)/(a^T a)`` is the Lagrange multiplier
- ``a = ∇C = ∇(∑h_i)`` is the constraint gradient
- ``G`` = Fisher information (BKM metric)
- ``(∇G)[θ]`` = third cumulant tensor contracted with θ
- ``∇a = ∇²C`` is the constraint Hessian
Parameters
----------
theta : ndarray, shape (n_params,)
Natural parameters
method : str, default='sld'
'sld' or 'duhamel' for derivative precision
n_points : int, default=100
Quadrature points for Duhamel (ignored for 'sld')
Returns
-------
grad_nu : ndarray, shape (n_params,)
Gradient ∇ν
Notes
-----
Uses all the high-precision components from Steps 1-3:
- BKM metric G (spectral formula)
- Third cumulant (∇G)[θ] (perturbation theory)
- Constraint Hessian ∇²C (Duhamel for high precision)
"""
# Get constraint gradient a = ∇C and constraint value (use optimized default)
C, a = self.marginal_entropy_constraint(theta) # Uses theta_only by default
# Get BKM metric G
G = self.fisher_information(theta)
# Compute Lagrange multiplier ν = (a^T G θ)/(||a||²)
a_norm_sq = np.dot(a, a)
nu = np.dot(a, G @ theta) / a_norm_sq
# Get third cumulant contraction (∇G)[θ] (use optimized default)
third_cumulant_contracted = self.third_cumulant_contraction(theta) # Uses fd by default
# Get constraint Hessian ∇²C = ∇a (use optimized default)
hessian_C = self.constraint_hessian(theta) # Uses fd_theta_only by default
# Compute gradient ∇ν for each parameter j
grad_nu = np.zeros(self.n_params)
for j in range(self.n_params):
# e_j is the j-th standard basis vector (handled implicitly)
# Term 1: a^T G e_j = (G^T a)_j = (Ga)_j (since G is symmetric)
term1 = (G @ a)[j]
# Term 2: a^T (∇G)[θ] e_j = [a^T (∇G)[θ]]_j
# This is the j-th component of a^T times the third cumulant contraction
term2 = np.dot(a, third_cumulant_contracted[:, j])
# Term 3: (∇a)_j^T G θ
# (∇a)_j is the j-th column of the Hessian
grad_a_j = hessian_C[:, j]
term3 = np.dot(grad_a_j, G @ theta)
# Term 4: -2ν a^T (∇a)_j
term4 = -2 * nu * np.dot(a, grad_a_j)
# Combine all terms
grad_nu[j] = (term1 + term2 + term3 + term4) / a_norm_sq
return grad_nu
[docs]
def jacobian(self, theta: np.ndarray,
method: str = 'duhamel',
n_points: int = 100) -> np.ndarray:
"""
Compute the Jacobian M = ∂F/∂θ of the constrained dynamics.
From the paper (equations 824-827):
F(θ) = -G(θ)θ + ν(θ)a(θ)
M = ∂F/∂θ = -G - (∇G)[θ] + ν∇²C + a(∇ν)^T
For systems with local operators only (no entanglement):
- Structural identity Gθ = -a holds
- ν = -1 always, ∇ν = 0
- Simplifies to: M = -G - (∇G)[θ] - ∇²C
For systems with entangling operators (pair basis):
- Structural identity BROKEN: Gθ ≠ -a
- ν ≠ -1, ∇ν ≠ 0
- Must use full formula: M = -G - (∇G)[θ] + ν∇²C + a(∇ν)^T
Parameters
----------
theta : ndarray, shape (n_params,)
Natural parameters
method : str, default='duhamel'
'sld' or 'duhamel' for derivative precision
Changed default to 'duhamel' for better accuracy with entangled states
n_points : int, default=100
Quadrature points for Duhamel (ignored for 'sld')
Returns
-------
M : ndarray, shape (n_params, n_params)
Jacobian matrix
Notes
-----
This is the full Jacobian for GENERIC dynamics:
θ̇ = F(θ) = -Gθ + νa
The degeneracy of M determines the geometry of the constraint manifold
and is central to the paper's analysis of the inaccessible game.
"""
# Get all components (use optimized defaults for each)
G = self.fisher_information(theta)
C, a = self.marginal_entropy_constraint(theta) # Uses theta_only by default
third_cumulant = self.third_cumulant_contraction(theta) # Uses fd by default
hessian_C = self.constraint_hessian(theta) # Uses fd_theta_only by default
# Compute Lagrange multiplier
Gtheta = G @ theta
nu = np.dot(a, Gtheta) / np.dot(a, a)
# Compute Lagrange multiplier gradient
grad_nu = self.lagrange_multiplier_gradient(theta, method=method, n_points=n_points)
# Assemble full Jacobian: M = -G - (∇G)[θ] + ν∇²C + a(∇ν)^T
M = -G - third_cumulant + nu * hessian_C + np.outer(a, grad_nu)
return M
[docs]
def symmetric_part(self, theta: np.ndarray,
method: str = 'duhamel',
n_points: int = 100) -> np.ndarray:
"""
Compute symmetric part S of the flow Jacobian M.
The GENERIC decomposition splits M into symmetric and antisymmetric parts:
M = S + A, where S = (M + M^T)/2
The symmetric part S generates the irreversible (dissipative) dynamics.
Parameters
----------
theta : np.ndarray
Natural parameters
method : str
Method for computing Jacobian
n_points : int
Number of points for numerical integration
Returns
-------
S : np.ndarray, shape (n_params, n_params)
Symmetric part of Jacobian, satisfies S = S^T
Notes
-----
The symmetric part satisfies key degeneracy conditions:
- S @ a ≈ 0, where a = ∇C is the constraint gradient
- θ^T S θ ≥ 0 (entropy production non-negative on tangent space)
See Also
--------
antisymmetric_part : Antisymmetric part of Jacobian
verify_degeneracy_conditions : Verify GENERIC structure
"""
M = self.jacobian(theta, method=method, n_points=n_points)
S = 0.5 * (M + M.T)
return S
[docs]
def antisymmetric_part(self, theta: np.ndarray,
method: str = 'duhamel',
n_points: int = 100) -> np.ndarray:
"""
Compute antisymmetric part A of the flow Jacobian M.
The GENERIC decomposition splits M into symmetric and antisymmetric parts:
M = S + A, where A = (M - M^T)/2
The antisymmetric part A generates the reversible (Hamiltonian) dynamics.
Parameters
----------
theta : np.ndarray
Natural parameters
method : str
Method for computing Jacobian
n_points : int
Number of points for numerical integration
Returns
-------
A : np.ndarray, shape (n_params, n_params)
Antisymmetric part of Jacobian, satisfies A = -A^T
Notes
-----
The antisymmetric part satisfies key degeneracy conditions:
- A @ (-G @ theta) ≈ 0, where -G @ theta = ∇H is the entropy gradient
The antisymmetric part encodes the effective Hamiltonian through:
A_ab θ_b = Σ_c f_abc η_c, where η are the Hamiltonian coefficients.
See Also
--------
symmetric_part : Symmetric part of Jacobian
verify_degeneracy_conditions : Verify GENERIC structure
"""
M = self.jacobian(theta, method=method, n_points=n_points)
A = 0.5 * (M - M.T)
return A
[docs]
def verify_degeneracy_conditions(self, theta: np.ndarray,
method: str = 'duhamel',
n_points: int = 100,
tol: float = 1e-6) -> Dict[str, Any]:
"""
Verify degeneracy conditions for GENERIC structure.
The GENERIC structure requires that the symmetric and antisymmetric
parts satisfy specific degeneracy conditions related to the constraint
and entropy gradients.
Parameters
----------
theta : np.ndarray
Natural parameters
method : str
Method for computing Jacobian
n_points : int
Number of points for numerical integration
tol : float
Tolerance for degeneracy conditions
Returns
-------
diagnostics : dict
Dictionary containing:
- 'S': Symmetric part
- 'A': Antisymmetric part
- 'constraint_gradient': a = ∇C
- 'entropy_gradient': ∇H = -G @ theta
- 'S_annihilates_constraint': ||S @ a||
- 'A_annihilates_entropy_gradient': ||A @ (-G @ theta)||
- 'entropy_production': θ^T S θ
- 'S_symmetric_error': ||S - S^T||
- 'A_antisymmetric_error': ||A + A^T||
- 'reconstruction_error': ||M - (S + A)||
- 'all_passed': bool indicating if all checks passed
Notes
-----
Degeneracy conditions (should hold within tolerance):
1. S @ a ≈ 0: Symmetric part annihilates constraint gradient
2. A @ ∇H ≈ 0: Antisymmetric part annihilates entropy gradient
3. θ^T S θ ≥ 0: Entropy production non-negative
4. S = S^T: Symmetry (machine precision)
5. A = -A^T: Antisymmetry (machine precision)
6. M = S + A: Reconstruction (machine precision)
"""
from qig.validation import ValidationReport
# Compute components
M = self.jacobian(theta, method=method, n_points=n_points)
S = self.symmetric_part(theta, method=method, n_points=n_points)
A = self.antisymmetric_part(theta, method=method, n_points=n_points)
# Get constraint gradient (using marginal entropies constraint)
marginals = marginal_entropies(self.rho_from_theta(theta),
[self.d] * (self.n_pairs if self.pair_basis else self.n_sites))
# Gradient of constraint C(θ) = Σ h_i
# Use finite differences for gradient
eps = 1e-7
a = np.zeros(len(theta))
C0 = np.sum(marginals)
for i in range(len(theta)):
theta_plus = theta.copy()
theta_plus[i] += eps
rho_plus = self.rho_from_theta(theta_plus)
marginals_plus = marginal_entropies(rho_plus,
[self.d] * (self.n_pairs if self.pair_basis else self.n_sites))
C_plus = np.sum(marginals_plus)
a[i] = (C_plus - C0) / eps
# Entropy gradient: ∇H = -G @ theta
G = self.fisher_information(theta)
entropy_gradient = -G @ theta
# Compute degeneracy violations
S_a = S @ a
A_entropy_grad = A @ entropy_gradient
S_annihilates_constraint = np.linalg.norm(S_a)
A_annihilates_entropy_gradient = np.linalg.norm(A_entropy_grad)
# Entropy production
entropy_production = theta @ S @ theta
# Symmetry/antisymmetry errors
S_symmetric_error = np.max(np.abs(S - S.T))
A_antisymmetric_error = np.max(np.abs(A + A.T))
# Reconstruction error
reconstruction_error = np.max(np.abs(M - (S + A)))
# Check all conditions
all_passed = (
S_annihilates_constraint < tol and
A_annihilates_entropy_gradient < tol and
entropy_production >= -tol and
S_symmetric_error < 1e-14 and
A_antisymmetric_error < 1e-14 and
reconstruction_error < 1e-14
)
diagnostics = {
'S': S,
'A': A,
'constraint_gradient': a,
'entropy_gradient': entropy_gradient,
'S_annihilates_constraint': S_annihilates_constraint,
'A_annihilates_entropy_gradient': A_annihilates_entropy_gradient,
'entropy_production': entropy_production,
'S_symmetric_error': S_symmetric_error,
'A_antisymmetric_error': A_antisymmetric_error,
'reconstruction_error': reconstruction_error,
'all_passed': all_passed,
'tolerance': tol
}
return diagnostics
def _grad_psi(self, theta: np.ndarray) -> np.ndarray:
"""
Compute ∇ψ(θ), the analytical gradient of the cumulant generating function ψ(θ).
Since ψ(θ) = log Tr(exp(K(θ))) where K(θ) = ∑ θ_a F_a, differentiate directly:
∂ψ/∂θ_a = ∂/∂θ_a [log Tr(exp(K))] = [1/Tr(exp(K))] * Tr( ∂/∂θ_a exp(K) )
Since ∂/∂θ_a exp(K) = exp(K) * F_a (in the sense of matrix multiplication),
we get: ∂ψ/∂θ_a = Tr( exp(K) F_a ) / Tr(exp(K))
This computes the gradient directly from the cumulant generating function
without materializing ρ(θ) as an intermediate.
Parameters
----------
theta : ndarray
Natural parameters
Returns
-------
grad_psi : ndarray, shape (n_params,)
∇ψ(θ), the gradient of the cumulant generating function
"""
# Compute K(θ) = ∑ θ_a F_a
K = sum(theta_a * F_a for theta_a, F_a in zip(theta, self.operators))
# Shift for stability. Since exp(K) = exp(c) exp(K-cI), the scalar cancels
# in Tr(exp(K) F_a) / Tr(exp(K)).
lambda_max = float(np.max(np.linalg.eigvalsh(K).real))
K_shift = K - lambda_max * np.eye(self.D, dtype=complex)
exp_K = expm(K_shift)
# Compute normalization Z = Tr(exp(K))
Z = np.trace(exp_K)
# Compute ∇ψ(θ)_a = Tr(exp(K) F_a) / Z for each a
return np.array([np.trace(exp_K @ F_a).real / Z.real for F_a in self.operators])
[docs]
def von_neumann_entropy(self, theta: np.ndarray) -> float:
"""
Compute the von Neumann entropy.
For exponential families ρ(θ) = exp(K(θ) - ψ(θ)), we use the fundamental identity:
H(ρ) = ψ(θ) - θ^T ∇ψ(θ)
where ∇ψ(θ) is the analytical gradient of the cumulant generating function ψ(θ).
This directly uses the exponential family structure and is more numerically
stable than eigendecomposition for states within the exponential family manifold.
Parameters
----------
theta : ndarray
Natural parameters
Returns
-------
float
Von Neumann entropy in nats
"""
# Use exponential family identity: H(ρ) = ψ(θ) - θ^T ∇ψ(θ)
psi_theta = self.psi(theta)
grad_psi = self._grad_psi(theta)
entropy = psi_theta - np.dot(theta, grad_psi)
# Ensure non-negative (numerical precision issues)
return max(0.0, float(entropy.real))
[docs]
def purity(self, theta: np.ndarray) -> float:
"""
Compute purity Tr(ρ²).
Pure states: Tr(ρ²) = 1
Maximally mixed: Tr(ρ²) = 1/D
Parameters
----------
theta : ndarray
Natural parameters
Returns
-------
float
Purity, between 1/D and 1
"""
rho = self.rho_from_theta(theta)
return np.trace(rho @ rho).real
# =========================================================================
# CIP-0008: σ-parametrised regularisation infrastructure
# =========================================================================
[docs]
def validate_sigma(self, sigma: np.ndarray) -> Tuple[bool, str]:
"""
Validate that σ is a valid density matrix of correct dimension.
Parameters
----------
sigma : ndarray
Matrix to validate
Returns
-------
is_valid : bool
True if σ is a valid density matrix
message : str
"Valid" or error description
"""
D = self.D
# Check shape
if sigma.shape != (D, D):
return False, f"Shape mismatch: expected ({D}, {D}), got {sigma.shape}"
# Check Hermitian
if not np.allclose(sigma, sigma.conj().T, atol=1e-10):
return False, "σ must be Hermitian"
# Check positive semidefinite
eigvals = np.linalg.eigvalsh(sigma)
if np.any(eigvals < -1e-10):
return False, f"σ must be PSD, min eigenvalue: {eigvals.min():.2e}"
# Check unit trace
tr = np.trace(sigma).real
if not np.isclose(tr, 1.0, atol=1e-10):
return False, f"Tr(σ) must be 1, got {tr:.6f}"
return True, "Valid"
[docs]
def is_product_sigma(
self, sigma: np.ndarray, tol: float = 1e-6
) -> Tuple[bool, Optional[List[np.ndarray]]]:
"""
Check if σ has product structure σ = σ₁⊗σ₂⊗...⊗σₙ.
Parameters
----------
sigma : ndarray
Density matrix to check
tol : float
Tolerance for product structure detection
Returns
-------
is_product : bool
True if σ is (approximately) a product state
factors : list of ndarray or None
If product, the individual σₖ matrices; otherwise None
"""
if self.n_pairs == 1:
# Single pair is trivially a "product"
return True, [sigma]
# For multi-pair, check if σ can be factorised
# This is expensive in general - use partial trace to check consistency
D_pair = self.d ** 2
# Extract marginals for each pair
marginals = []
for k in range(self.n_pairs):
# Trace out all pairs except k
sigma_k = self._partial_trace_to_pair(sigma, k)
marginals.append(sigma_k)
# Reconstruct product state from marginals
sigma_product = marginals[0]
for k in range(1, self.n_pairs):
sigma_product = np.kron(sigma_product, marginals[k])
# Check if reconstruction matches
if np.allclose(sigma, sigma_product, atol=tol):
return True, marginals
return False, None
def _partial_trace_to_pair(self, sigma: np.ndarray, pair_idx: int) -> np.ndarray:
"""Trace out all pairs except pair_idx, returning d²×d² density matrix."""
D_pair = self.d ** 2
n = self.n_pairs
if n == 1:
return sigma # Nothing to trace
# Reshape to tensor with legs for each pair
# Shape: (D_pair,) * n for bra, then (D_pair,) * n for ket
sigma_tensor = sigma.reshape([D_pair] * (2 * n))
# Use einsum to trace out all pairs except pair_idx
# Build index strings: bra indices 0..n-1, ket indices n..2n-1
# Pairs to trace get same index letter; pair to keep gets distinct letters
bra_indices = []
ket_indices = []
trace_letter = ord('a')
keep_bra = chr(ord('z') - 1) # 'y'
keep_ket = chr(ord('z')) # 'z'
for k in range(n):
if k == pair_idx:
bra_indices.append(keep_bra)
ket_indices.append(keep_ket)
else:
letter = chr(trace_letter)
bra_indices.append(letter)
ket_indices.append(letter) # Same letter = trace over this pair
trace_letter += 1
input_str = ''.join(bra_indices + ket_indices)
output_str = keep_bra + keep_ket
einsum_str = f"{input_str}->{output_str}"
result = np.einsum(einsum_str, sigma_tensor)
return result.reshape(D_pair, D_pair)
[docs]
def detect_sigma_structure(self, sigma: np.ndarray) -> str:
"""
Detect structure of σ for efficiency optimisation.
Parameters
----------
sigma : ndarray
Regularisation matrix
Returns
-------
structure : str
One of: 'isotropic', 'product', 'pure', 'general'
"""
D = self.D
# Check if isotropic (I/D)
if np.allclose(sigma, np.eye(D) / D, atol=1e-10):
return 'isotropic'
# Check if product state (for multi-pair)
if self.n_pairs > 1:
is_prod, _ = self.is_product_sigma(sigma)
if is_prod:
return 'product'
# Check if rank-1 (pure state)
eigvals = np.linalg.eigvalsh(sigma)
n_nonzero = np.sum(eigvals > 1e-10)
if n_nonzero == 1:
return 'pure'
return 'general'
[docs]
def regularise_pure_state(
self,
psi: np.ndarray,
epsilon: float,
sigma: Optional[np.ndarray] = None,
) -> np.ndarray:
"""
Create regularised density matrix from pure state.
ρ_ε = (1-ε)|ψ⟩⟨ψ| + ε σ
Parameters
----------
psi : ndarray
Pure state vector (length D)
epsilon : float
Regularisation strength, must be in (0, 1)
sigma : ndarray, optional
Regularisation direction. Default: I/D (isotropic)
Returns
-------
rho_epsilon : ndarray
Regularised density matrix (D×D)
"""
D = self.D
# Validate psi
if psi.shape != (D,):
raise ValueError(f"psi must have length {D}, got {psi.shape}")
# Normalise psi if needed
norm = np.linalg.norm(psi)
if not np.isclose(norm, 1.0, atol=1e-10):
psi = psi / norm
# Validate epsilon
if not 0 < epsilon < 1:
raise ValueError(f"epsilon must be in (0, 1), got {epsilon}")
# Default sigma is isotropic
if sigma is None:
sigma = np.eye(D, dtype=complex) / D
else:
# Validate sigma
is_valid, msg = self.validate_sigma(sigma)
if not is_valid:
raise ValueError(f"Invalid sigma: {msg}")
# Create regularised state
rho_pure = np.outer(psi, psi.conj())
rho_epsilon = (1 - epsilon) * rho_pure + epsilon * sigma
return rho_epsilon
def _bell_parameters_product_sigma(
self,
eps_val: float,
log_eps: float,
sigma_per_pair: List[np.ndarray],
bell_indices: Optional[List[int]] = None,
) -> np.ndarray:
"""
Efficient θ computation for product σ = σ₁⊗...⊗σₙ.
For each pair k, the marginal is:
ρₖ_ε = (1-ε)|Φₖ⟩⟨Φₖ| + ε σₖ
We compute log(ρₖ_ε) via small d²×d² eigendecomposition,
then project onto the per-pair operators.
Complexity: O(n × d⁶) instead of O(d^(6n))
Parameters
----------
eps_val : float
Regularisation ε value
log_eps : float
log(ε) for numerical stability
sigma_per_pair : list of ndarray
List of n density matrices, each d²×d² for one pair
bell_indices : list of int, optional
Which Bell state (k=0,...,d-1) to use for each pair.
Default: all zeros (standard Bell state |Φ₀⟩).
Returns
-------
theta : ndarray
Natural parameters [θ₁, θ₂, ..., θₙ]
"""
from .pair_operators import pair_basis_generators, bell_state
d = self.d
D_pair = d ** 2
n_ops_per_pair = D_pair ** 2 - 1 # d⁴ - 1
# Get single-pair generators
single_pair_ops = pair_basis_generators(d)
# Handle bell_indices
if bell_indices is None:
bell_indices = [0] * self.n_pairs
# Compute θ for each pair
theta = np.zeros(self.n_params)
for k in range(self.n_pairs):
sigma_k = sigma_per_pair[k]
# Get Bell state for this pair (may differ per pair via bell_indices)
psi_bell_k = bell_state(d, bell_indices[k])
rho_bell_k = np.outer(psi_bell_k, psi_bell_k.conj())
# Form ρₖ_ε = (1-ε)|Φₖ⟩⟨Φₖ| + ε σₖ
rho_k_eps = (1 - eps_val) * rho_bell_k + eps_val * sigma_k
# Ensure Hermitian
rho_k_eps = (rho_k_eps + rho_k_eps.conj().T) / 2
# Compute log via eigendecomposition (small d²×d² matrix)
eigvals, U = eigh(rho_k_eps)
eigvals = np.maximum(eigvals, np.finfo(float).tiny)
log_eigvals = np.log(eigvals)
log_rho_k = U @ np.diag(log_eigvals) @ U.conj().T
# Project onto per-pair operators
# θₐ = Tr(log(ρₖ) Fₐ) / Tr(Fₐ²)
start_idx = k * n_ops_per_pair
for a, F_a in enumerate(single_pair_ops):
numerator = np.real(np.trace(log_rho_k @ F_a))
denominator = np.real(np.trace(F_a @ F_a))
if denominator > 0:
theta[start_idx + a] = numerator / denominator
return theta
[docs]
def get_bell_state_parameters(
self,
epsilon: float = 1e-6,
log_epsilon: Optional[float] = None,
sigma: Optional[np.ndarray] = None,
sigma_per_pair: Optional[List[np.ndarray]] = None,
bell_indices: Optional[List[int]] = None,
) -> np.ndarray:
"""
Get natural parameters θ corresponding to a regularised Bell state.
A Bell state is a pure state (rank 1), which lies at the boundary
of the exponential family where natural parameters θ → -∞.
For regularised state: ``ρ_ε = (1-ε)|Φ⟩⟨Φ| + ε σ``
We compute θ by solving: ``ρ_ε = exp(Σ θₐFₐ - ψ(θ))``
This gives: ``θₐ ∝ Tr(log(ρ_ε) Fₐ)``
Parameters
----------
epsilon : float, default=1e-6
Regularisation parameter.
Smaller epsilon → closer to pure Bell state (more negative θ).
Must be > 0 to avoid singularities.
log_epsilon : float, optional
If provided, overrides ``epsilon`` via log ε = log_epsilon.
This is numerically convenient when exploring very small ε.
sigma : ndarray, optional
Full D×D regularisation matrix. Any valid density matrix.
Use for entangled σ (inter-pair correlations in perturbation).
If None and sigma_per_pair is None, uses I/D (isotropic).
bell_indices : list of int, optional
Which Bell state (k=0,...,d-1) to use for each pair.
Default: all zeros (standard Bell state ``|Φ₀⟩``).
Allows exploring different pure state origins.
sigma_per_pair : list of ndarray, optional
List of n density matrices, each d²×d² for one pair.
Constructs σ = σ₁⊗σ₂⊗...⊗σₙ (product structure).
Efficient O(n) computation preserved.
Only valid for multi-pair systems (n_pairs > 1).
Returns
-------
theta : ndarray
Natural parameters for the regularized Bell state.
Many components will be large and negative (approaching -∞
for pure state).
Raises
------
ValueError
If both sigma and sigma_per_pair are provided.
If sigma_per_pair has wrong length or invalid matrices.
Notes
-----
The regularisation matrix σ encodes the "direction of approach"
to the pure-state boundary (see CIP-0008 and entropy_time_paths.ipynb):
- Different σ = different "meridians" from the north pole
- Isotropic σ = I/D gives the "boring" symmetric departure
- Anisotropic σ reveals the tangent cone of possible departures
For the exponential family ρ(θ) = exp(H(θ))/Z where H = Σ θₐFₐ,
we have: log(ρ) = H - log(Z)·I
Since Tr(Fₐ) = 0 for our operators, we get:
θₐ = Tr(log(ρ) Fₐ) / Tr(FₐFₐ)
**Multi-pair note**: For n_pairs > 1, our operator basis is the direct
sum of per-pair su(d²) algebras, NOT the full su(D) algebra. The
regularised Bell state may have components outside this subspace.
The returned θ is the projection onto our subspace, which is correct
for the inaccessible game dynamics (which stay in this subspace).
Reconstruction via ``rho_from_theta(θ)`` will NOT exactly match the
target ρ_ε, but the dynamics are correct.
"""
if not self.pair_basis:
raise ValueError("Bell states only defined for pair_basis=True")
# Validate sigma options
if sigma is not None and sigma_per_pair is not None:
raise ValueError("Cannot specify both sigma and sigma_per_pair")
# Handle sigma_per_pair for multi-pair systems (efficient O(n × d⁶) path)
if sigma_per_pair is not None:
if self.n_pairs == 1:
raise ValueError("sigma_per_pair only valid for n_pairs > 1")
if len(sigma_per_pair) != self.n_pairs:
raise ValueError(
f"sigma_per_pair must have {self.n_pairs} matrices, got {len(sigma_per_pair)}"
)
# Validate each per-pair sigma (fast: O(n × d⁶) not O(D³))
D_pair = self.d ** 2
for k, sigma_k in enumerate(sigma_per_pair):
if sigma_k.shape != (D_pair, D_pair):
raise ValueError(
f"sigma_per_pair[{k}] must be {D_pair}×{D_pair}, got {sigma_k.shape}"
)
# Quick validation (skip full eigenvalue check for speed)
if not np.allclose(sigma_k, sigma_k.conj().T, atol=1e-10):
raise ValueError(f"sigma_per_pair[{k}] must be Hermitian")
if not np.isclose(np.trace(sigma_k), 1.0, atol=1e-10):
raise ValueError(f"sigma_per_pair[{k}] must have unit trace")
# Go directly to efficient per-pair computation (skip full sigma build!)
if log_epsilon is not None:
log_eps = float(log_epsilon)
else:
log_eps = float(np.log(epsilon))
eps_val = float(np.exp(log_eps))
return self._bell_parameters_product_sigma(eps_val, log_eps, sigma_per_pair, bell_indices)
# Work with log ε directly for numerical stability near the boundary.
if log_epsilon is not None:
log_eps = float(log_epsilon)
if not np.isfinite(log_eps):
raise ValueError("log_epsilon must be a finite real number")
else:
if epsilon <= 0:
raise ValueError("epsilon must be > 0 (pure Bell state has θ → -∞)")
log_eps = float(np.log(epsilon))
# IMPORTANT:
# - For σ = I/D (isotropic), we can keep working in log-space even when
# log_eps is far below float64 underflow (≈ -708). This avoids artificial
# saturation in θ when exploring extreme log_eps.
# - For more general σ (including per-pair σₖ), we must materialize
# (1-ε)ρ + εσ in float64, so ε will underflow; those paths clamp.
D = self.D
# Get Bell state (product of n_pairs Bell states)
psi_bell = product_of_bell_states(self.n_pairs, self.d, bell_indices=bell_indices)
rho_bell = np.outer(psi_bell, psi_bell.conj())
# Determine sigma structure and compute log(ρ_ε) accordingly
if sigma is None:
# Isotropic case: σ = I/D (efficient analytic formula)
sigma_structure = 'isotropic'
else:
# Validate sigma
is_valid, msg = self.validate_sigma(sigma)
if not is_valid:
raise ValueError(f"Invalid sigma: {msg}")
sigma_structure = self.detect_sigma_structure(sigma)
if sigma_structure == 'isotropic':
# Efficient analytic computation for σ = I/D
# ρ_ε = (1-ε)|Ψ⟩⟨Ψ| + ε I/D has eigenvalues:
# λ₀ = 1 - ε + ε/D (the Bell state direction)
# λ_⊥ = ε/D (all D-1 orthogonal directions)
# This works for any n_pairs!
eigvals_bell, U = eigh(rho_bell)
idx_max = int(np.argmax(eigvals_bell.real))
# ε may underflow for very negative log_eps; in that limit λ₀→1 so
# log(λ₀)→0, while log(λ_⊥)=log_eps-log(D) remains well-defined.
min_log_eps = float(np.log(np.finfo(float).tiny))
eps_val = float(np.exp(log_eps)) if log_eps >= min_log_eps else 0.0
factor = eps_val * (1.0 - 1.0 / D)
log_lambda0 = np.log1p(-factor) if factor < 1.0 else np.log(np.finfo(float).tiny)
log_lambda_perp = log_eps - np.log(D)
log_diag = np.full(D, log_lambda_perp, dtype=float)
log_diag[idx_max] = log_lambda0
log_rho = U @ np.diag(log_diag) @ U.conj().T
elif sigma_structure == 'product' and self.n_pairs > 1:
# Product sigma: efficient per-pair computation!
# For σ = σ₁⊗...⊗σₙ and |Ψ⟩ = |Φ⟩⊗...⊗|Φ⟩:
# The marginal on pair k is: ρₖ_ε = (1-ε)|Φ⟩⟨Φ| + ε σₖ
# We compute θ^(k) from log(ρₖ_ε) directly.
# Complexity: O(n × d⁶) instead of O(d^(6n))
# If sigma_per_pair wasn't provided, extract marginals from sigma
if sigma_per_pair is None:
sigma_per_pair = [
self._partial_trace_to_pair(sigma, k)
for k in range(self.n_pairs)
]
# Need ε in float64 arithmetic in the per-pair path; clamp to avoid
# underflow producing singular logs.
min_log_eps = float(np.log(np.finfo(float).tiny))
log_eps_clamped = max(log_eps, min_log_eps)
eps_val = float(np.exp(log_eps_clamped))
return self._bell_parameters_product_sigma(
eps_val, log_eps, sigma_per_pair, bell_indices
)
else:
# General case: compute ρ_ε and take matrix log
# This is O(D³) but handles arbitrary σ
import warnings
if sigma_structure == 'general' and self.n_pairs > 1:
warnings.warn(
f"Using general σ (structure: {sigma_structure}) with {self.n_pairs} pairs. "
"This loses O(n) efficiency.",
UserWarning
)
# Need ε in float64 arithmetic; clamp to avoid underflow producing
# a singular pure state and -inf in the log.
min_log_eps = float(np.log(np.finfo(float).tiny))
log_eps_clamped = max(log_eps, min_log_eps)
eps_val = float(np.exp(log_eps_clamped))
rho_eps = (1 - eps_val) * rho_bell + eps_val * sigma
# Ensure Hermitian and compute log via eigendecomposition
rho_eps = (rho_eps + rho_eps.conj().T) / 2
eigvals, U = eigh(rho_eps)
# Clamp small eigenvalues for numerical stability
eigvals = np.maximum(eigvals, np.finfo(float).tiny)
log_eigvals = np.log(eigvals)
log_rho = U @ np.diag(log_eigvals) @ U.conj().T
# Extract natural parameters by projection onto operator basis
# θₐ = Tr(log(ρ) Fₐ) / Tr(FₐFₐ)
theta = np.zeros(self.n_params)
for a, F_a in enumerate(self.operators):
numerator = np.real(np.trace(log_rho @ F_a))
denominator = np.real(np.trace(F_a @ F_a))
if denominator > 0:
theta[a] = numerator / denominator
else:
theta[a] = 0.0
return theta
__all__ = [
"pauli_basis",
"gell_mann_matrices",
"qutrit_basis",
"create_operator_basis",
"QuantumExponentialFamily",
]