omecp/geometry.rs
1//! Core geometry and state data structures for molecular representations.
2//!
3//! This module provides the fundamental data types for representing molecular
4//! geometries and electronic states in MECP calculations. It includes:
5//!
6//! - [`Geometry`]: Molecular structure with element types and Cartesian coordinates
7//! - [`State`]: Electronic state with energy, forces, and associated geometry
8//!
9//! # Unit Conventions
10//!
11//! - **Coordinates**: Stored in Angstrom (Å) - the standard unit for molecular geometry
12//! - **Forces/Gradients**: Stored in Hartree/Angstrom (Ha/Å) - converted from native QM output
13//! - **Energy**: Stored in Hartree (Ha)
14
15use nalgebra::DVector;
16
17/// Unit conversion constants for coordinate systems
18const BOHR_TO_ANGSTROM: f64 = 0.5291772489;
19const ANGSTROM_TO_BOHR: f64 = 1.8897259886;
20
21/// Convert coordinates from Angstroms to Bohrs
22pub fn angstrom_to_bohr(coords: &DVector<f64>) -> DVector<f64> {
23 coords * ANGSTROM_TO_BOHR
24}
25
26/// Convert coordinates from Bohrs to Angstroms
27pub fn bohr_to_angstrom(coords: &DVector<f64>) -> DVector<f64> {
28 coords * BOHR_TO_ANGSTROM
29}
30
31/// Convert forces from Hartree/Bohr to Hartree/Angstrom.
32///
33/// Forces have units of energy/length. To convert from Ha/Bohr to Ha/Å:
34/// ```text
35/// F(Ha/Å) = F(Ha/Bohr) × (1 Bohr / 0.529177 Å) = F(Ha/Bohr) × ANGSTROM_TO_BOHR
36/// ```
37///
38/// This ensures forces are consistent with coordinates stored in Angstrom.
39pub fn forces_bohr_to_angstrom(forces: &DVector<f64>) -> DVector<f64> {
40 forces * ANGSTROM_TO_BOHR
41}
42
43/// Represents a molecular geometry with atomic elements and Cartesian coordinates.
44///
45/// The `Geometry` struct stores the fundamental information about a molecular
46/// structure: the chemical elements of each atom and their 3D positions in
47/// Cartesian coordinates. It uses a flat representation where coordinates are
48/// stored as a single-dimensional vector in the order [x1, y1, z1, x2, y2, z2, ...].
49///
50/// # Coordinate System
51///
52/// - Units: Angstrom (Å) - standard molecular geometry unit
53/// - Coordinate frame: Cartesian (x, y, z)
54/// - Origin: Arbitrary (typically centered for convenience)
55///
56/// # Storage Format
57///
58/// Coordinates are stored in a `DVector<f64>` for efficient linear algebra operations.
59/// The flat representation enables direct use with nalgebra for matrix operations
60/// commonly required in geometry optimization.
61///
62/// # Examples
63///
64/// ```
65/// use omecp::geometry::Geometry;
66///
67/// // Create a water molecule geometry (coordinates in Angstrom)
68/// let elements = vec![
69/// "O".to_string(),
70/// "H".to_string(),
71/// "H".to_string(),
72/// ];
73/// let coords = vec![
74/// 0.0, 0.0, 0.0, // O at origin
75/// 0.757, 0.586, 0.0, // H1 (Angstrom)
76/// -0.757, 0.586, 0.0, // H2 (Angstrom)
77/// ];
78///
79/// let geometry = Geometry::new(elements, coords);
80/// assert_eq!(geometry.num_atoms, 3);
81///
82/// // Get coordinates of atom 0 (oxygen)
83/// let oxygen_coords = geometry.get_atom_coords(0);
84/// println!("Oxygen position: ({:.3}, {:.3}, {:.3}) Angstrom",
85/// oxygen_coords[0], oxygen_coords[1], oxygen_coords[2]);
86/// ```
87#[derive(Debug, Clone)]
88pub struct Geometry {
89 /// Chemical element symbols for each atom in order
90 pub elements: Vec<String>,
91 /// Flattened Cartesian coordinates [x1, y1, z1, x2, y2, z2, ...] in Angstrom (Å)
92 pub coords: DVector<f64>,
93 /// Number of atoms in the molecule
94 pub num_atoms: usize,
95}
96
97impl Geometry {
98 /// Create a new `Geometry` from element list and coordinate vector.
99 ///
100 /// # Arguments
101 ///
102 /// * `elements` - Vector of element symbols (e.g., "C", "H", "O")
103 /// * `coords` - Flattened coordinate vector of length 3 × num_atoms
104 ///
105 /// # Panics
106 ///
107 /// Panics if `coords.len() != elements.len() * 3`, ensuring data consistency.
108 ///
109 /// # Examples
110 ///
111 /// ```
112 /// use omecp::geometry::Geometry;
113 ///
114 /// let elements = vec!["C".to_string(), "H".to_string()];
115 /// let coords = vec![0.0, 0.0, 0.0, 1.0, 0.0, 0.0];
116 /// let geometry = Geometry::new(elements, coords);
117 /// ```
118 pub fn new(elements: Vec<String>, coords: Vec<f64>) -> Self {
119 let num_atoms = elements.len();
120 assert_eq!(coords.len(), num_atoms * 3);
121 Self {
122 elements,
123 coords: DVector::from_vec(coords),
124 num_atoms,
125 }
126 }
127
128 /// Get the Cartesian coordinates of a specific atom.
129 ///
130 /// # Arguments
131 ///
132 /// * `atom_idx` - Zero-based index of the atom (0 = first atom)
133 ///
134 /// # Returns
135 ///
136 /// Array of three coordinates [x, y, z] in Angstroms.
137 ///
138 /// # Examples
139 ///
140 /// ```
141 /// use omecp::geometry::Geometry;
142 ///
143 /// let elements = vec!["O".to_string(), "H".to_string(), "H".to_string()];
144 /// let coords = vec![0.0, 0.0, 0.0, 0.757, 0.586, 0.0, -0.757, 0.586, 0.0];
145 /// let geometry = Geometry::new(elements, coords);
146 ///
147 /// // Get oxygen coordinates
148 /// let o_coords = geometry.get_atom_coords(0);
149 /// assert_eq!(o_coords, [0.0, 0.0, 0.0]);
150 ///
151 /// // Get first hydrogen coordinates
152 /// let h1_coords = geometry.get_atom_coords(1);
153 /// assert_eq!(h1_coords, [0.757, 0.586, 0.0]);
154 /// ```
155 pub fn get_atom_coords(&self, atom_idx: usize) -> [f64; 3] {
156 let i = atom_idx * 3;
157 [self.coords[i], self.coords[i + 1], self.coords[i + 2]]
158 }
159}
160
161/// Represents an electronic state of a molecule with energy, forces, and geometry.
162///
163/// The `State` struct encapsulates all information about a single electronic state
164/// needed for MECP optimization: the potential energy, the gradient (forces), and
165/// the molecular geometry at which these were evaluated.
166///
167/// In the MECP algorithm, we track two states simultaneously (typically called
168/// "state A" and "state B") and seek the geometry where their energies are equal
169/// while minimizing the perpendicular gradient component.
170///
171/// # Unit Conventions
172///
173/// - **Energy**: Hartree (Ha) - atomic unit of energy
174/// - **Forces/Gradients**: Hartree/Angstrom (Ha/Å) - converted from native QM output
175/// - **Geometry coordinates**: Angstrom (Å) - see [`Geometry`] documentation
176///
177/// # Force Convention
178///
179/// Forces are the negative gradient of the energy with respect to nuclear positions:
180/// ```text
181/// F = -∇E
182/// ```
183/// This is the standard convention in quantum chemistry, where positive forces
184/// point in the direction of decreasing energy. Forces are converted from the
185/// native QM program output (Ha/Bohr) to Ha/Å for consistency with coordinates.
186///
187#[derive(Debug, Clone)]
188pub struct State {
189 /// Potential energy of the state in Hartree (Ha)
190 pub energy: f64,
191 /// Forces (negative gradient) in Hartree/Angstrom (Ha/Å)
192 /// Stored as a flat vector matching Geometry::coords format
193 pub forces: DVector<f64>,
194 /// Molecular geometry at which energy and forces were evaluated (coordinates in Angstrom)
195 pub geometry: Geometry,
196}
197impl State {
198 /// Validates that the State contains meaningful data and is not a zero-energy failure.
199 ///
200 /// This method checks for common failure modes where QM calculations appear to
201 /// succeed but actually return invalid or default values. It prevents silent
202 /// failures that could lead to incorrect MECP optimization results.
203 ///
204 /// # Validation Criteria
205 ///
206 /// The State is considered invalid if:
207 /// - Energy is exactly zero (indicates parsing failure or uninitialized state)
208 /// - All force components are zero (indicates gradient calculation failure)
209 /// - Forces vector is empty (indicates missing gradient data)
210 ///
211 /// # Returns
212 ///
213 /// - `Ok(())` if the State contains valid, meaningful data
214 /// - `Err(String)` with a descriptive error message if validation fails
215 ///
216 /// # Examples
217 ///
218 /// ```
219 /// use omecp::geometry::{Geometry, State};
220 /// use nalgebra::DVector;
221 ///
222 /// // Valid state with non-zero energy and forces
223 /// let geometry = Geometry::new(vec!["H".to_string()], vec![0.0, 0.0, 0.0]);
224 /// let valid_state = State {
225 /// energy: -0.5,
226 /// forces: DVector::from_vec(vec![0.1, -0.2, 0.0]),
227 /// geometry,
228 /// };
229 /// assert!(valid_state.validate().is_ok());
230 ///
231 /// // Invalid state with zero energy
232 /// let geometry = Geometry::new(vec!["H".to_string()], vec![0.0, 0.0, 0.0]);
233 /// let invalid_state = State {
234 /// energy: 0.0,
235 /// forces: DVector::from_vec(vec![0.1, -0.2, 0.0]),
236 /// geometry,
237 /// };
238 /// assert!(invalid_state.validate().is_err());
239 /// ```
240 pub fn validate(&self) -> Result<(), String> {
241 // Check for zero energy (common failure mode)
242 if self.energy == 0.0 {
243 return Err(
244 "State contains zero energy, indicating parsing failure or uninitialized state"
245 .to_string(),
246 );
247 }
248
249 // Check for empty forces vector
250 if self.forces.is_empty() {
251 return Err(
252 "State contains empty forces vector, indicating missing gradient data".to_string(),
253 );
254 }
255
256 // Check for all-zero forces (gradient calculation failure)
257 if self.forces.iter().all(|&f| f == 0.0) {
258 return Err(
259 "State contains all-zero forces, indicating gradient calculation failure"
260 .to_string(),
261 );
262 }
263
264 // Check force/geometry consistency
265 let expected_force_components = self.geometry.num_atoms * 3;
266 if self.forces.len() != expected_force_components {
267 return Err(format!(
268 "Force/geometry mismatch: expected {} force components for {} atoms, got {}",
269 expected_force_components,
270 self.geometry.num_atoms,
271 self.forces.len()
272 ));
273 }
274
275 Ok(())
276 }
277}