Skip to main content
← OpenMECP Documentation

omecp/
geometry.rs

1//! Core geometry and state data structures for molecular representations.
2//!
3//! This module provides the fundamental data types for representing molecular
4//! geometries and electronic states in MECP calculations. It includes:
5//!
6//! - [`Geometry`]: Molecular structure with element types and Cartesian coordinates
7//! - [`State`]: Electronic state with energy, forces, and associated geometry
8//!
9//! # Unit Conventions
10//!
11//! - **Coordinates**: Stored in Angstrom (Å) - the standard unit for molecular geometry
12//! - **Forces/Gradients**: Stored in Hartree/Angstrom (Ha/Å) - converted from native QM output
13//! - **Energy**: Stored in Hartree (Ha)
14
15use nalgebra::DVector;
16
17/// Unit conversion constants for coordinate systems
18const BOHR_TO_ANGSTROM: f64 = 0.5291772489;
19const ANGSTROM_TO_BOHR: f64 = 1.8897259886;
20
21/// Convert coordinates from Angstroms to Bohrs
22pub fn angstrom_to_bohr(coords: &DVector<f64>) -> DVector<f64> {
23    coords * ANGSTROM_TO_BOHR
24}
25
26/// Convert coordinates from Bohrs to Angstroms
27pub fn bohr_to_angstrom(coords: &DVector<f64>) -> DVector<f64> {
28    coords * BOHR_TO_ANGSTROM
29}
30
31/// Convert forces from Hartree/Bohr to Hartree/Angstrom.
32///
33/// Forces have units of energy/length. To convert from Ha/Bohr to Ha/Å:
34/// ```text
35/// F(Ha/Å) = F(Ha/Bohr) × (1 Bohr / 0.529177 Å) = F(Ha/Bohr) × ANGSTROM_TO_BOHR
36/// ```
37///
38/// This ensures forces are consistent with coordinates stored in Angstrom.
39pub fn forces_bohr_to_angstrom(forces: &DVector<f64>) -> DVector<f64> {
40    forces * ANGSTROM_TO_BOHR
41}
42
43/// Represents a molecular geometry with atomic elements and Cartesian coordinates.
44///
45/// The `Geometry` struct stores the fundamental information about a molecular
46/// structure: the chemical elements of each atom and their 3D positions in
47/// Cartesian coordinates. It uses a flat representation where coordinates are
48/// stored as a single-dimensional vector in the order [x1, y1, z1, x2, y2, z2, ...].
49///
50/// # Coordinate System
51///
52/// - Units: Angstrom (Å) - standard molecular geometry unit
53/// - Coordinate frame: Cartesian (x, y, z)
54/// - Origin: Arbitrary (typically centered for convenience)
55///
56/// # Storage Format
57///
58/// Coordinates are stored in a `DVector<f64>` for efficient linear algebra operations.
59/// The flat representation enables direct use with nalgebra for matrix operations
60/// commonly required in geometry optimization.
61///
62/// # Examples
63///
64/// ```
65/// use omecp::geometry::Geometry;
66///
67/// // Create a water molecule geometry (coordinates in Angstrom)
68/// let elements = vec![
69///     "O".to_string(),
70///     "H".to_string(),
71///     "H".to_string(),
72/// ];
73/// let coords = vec![
74///     0.0, 0.0, 0.0,        // O at origin
75///     0.757, 0.586, 0.0,    // H1 (Angstrom)
76///     -0.757, 0.586, 0.0,   // H2 (Angstrom)
77/// ];
78///
79/// let geometry = Geometry::new(elements, coords);
80/// assert_eq!(geometry.num_atoms, 3);
81///
82/// // Get coordinates of atom 0 (oxygen)
83/// let oxygen_coords = geometry.get_atom_coords(0);
84/// println!("Oxygen position: ({:.3}, {:.3}, {:.3}) Angstrom",
85///          oxygen_coords[0], oxygen_coords[1], oxygen_coords[2]);
86/// ```
87#[derive(Debug, Clone)]
88pub struct Geometry {
89    /// Chemical element symbols for each atom in order
90    pub elements: Vec<String>,
91    /// Flattened Cartesian coordinates [x1, y1, z1, x2, y2, z2, ...] in Angstrom (Å)
92    pub coords: DVector<f64>,
93    /// Number of atoms in the molecule
94    pub num_atoms: usize,
95}
96
97impl Geometry {
98    /// Create a new `Geometry` from element list and coordinate vector.
99    ///
100    /// # Arguments
101    ///
102    /// * `elements` - Vector of element symbols (e.g., "C", "H", "O")
103    /// * `coords` - Flattened coordinate vector of length 3 × num_atoms
104    ///
105    /// # Panics
106    ///
107    /// Panics if `coords.len() != elements.len() * 3`, ensuring data consistency.
108    ///
109    /// # Examples
110    ///
111    /// ```
112    /// use omecp::geometry::Geometry;
113    ///
114    /// let elements = vec!["C".to_string(), "H".to_string()];
115    /// let coords = vec![0.0, 0.0, 0.0, 1.0, 0.0, 0.0];
116    /// let geometry = Geometry::new(elements, coords);
117    /// ```
118    pub fn new(elements: Vec<String>, coords: Vec<f64>) -> Self {
119        let num_atoms = elements.len();
120        assert_eq!(coords.len(), num_atoms * 3);
121        Self {
122            elements,
123            coords: DVector::from_vec(coords),
124            num_atoms,
125        }
126    }
127
128    /// Get the Cartesian coordinates of a specific atom.
129    ///
130    /// # Arguments
131    ///
132    /// * `atom_idx` - Zero-based index of the atom (0 = first atom)
133    ///
134    /// # Returns
135    ///
136    /// Array of three coordinates [x, y, z] in Angstroms.
137    ///
138    /// # Examples
139    ///
140    /// ```
141    /// use omecp::geometry::Geometry;
142    ///
143    /// let elements = vec!["O".to_string(), "H".to_string(), "H".to_string()];
144    /// let coords = vec![0.0, 0.0, 0.0, 0.757, 0.586, 0.0, -0.757, 0.586, 0.0];
145    /// let geometry = Geometry::new(elements, coords);
146    ///
147    /// // Get oxygen coordinates
148    /// let o_coords = geometry.get_atom_coords(0);
149    /// assert_eq!(o_coords, [0.0, 0.0, 0.0]);
150    ///
151    /// // Get first hydrogen coordinates
152    /// let h1_coords = geometry.get_atom_coords(1);
153    /// assert_eq!(h1_coords, [0.757, 0.586, 0.0]);
154    /// ```
155    pub fn get_atom_coords(&self, atom_idx: usize) -> [f64; 3] {
156        let i = atom_idx * 3;
157        [self.coords[i], self.coords[i + 1], self.coords[i + 2]]
158    }
159}
160
161/// Represents an electronic state of a molecule with energy, forces, and geometry.
162///
163/// The `State` struct encapsulates all information about a single electronic state
164/// needed for MECP optimization: the potential energy, the gradient (forces), and
165/// the molecular geometry at which these were evaluated.
166///
167/// In the MECP algorithm, we track two states simultaneously (typically called
168/// "state A" and "state B") and seek the geometry where their energies are equal
169/// while minimizing the perpendicular gradient component.
170///
171/// # Unit Conventions
172///
173/// - **Energy**: Hartree (Ha) - atomic unit of energy
174/// - **Forces/Gradients**: Hartree/Angstrom (Ha/Å) - converted from native QM output
175/// - **Geometry coordinates**: Angstrom (Å) - see [`Geometry`] documentation
176///
177/// # Force Convention
178///
179/// Forces are the negative gradient of the energy with respect to nuclear positions:
180/// ```text
181/// F = -∇E
182/// ```
183/// This is the standard convention in quantum chemistry, where positive forces
184/// point in the direction of decreasing energy. Forces are converted from the
185/// native QM program output (Ha/Bohr) to Ha/Å for consistency with coordinates.
186///
187#[derive(Debug, Clone)]
188pub struct State {
189    /// Potential energy of the state in Hartree (Ha)
190    pub energy: f64,
191    /// Forces (negative gradient) in Hartree/Angstrom (Ha/Å)
192    /// Stored as a flat vector matching Geometry::coords format
193    pub forces: DVector<f64>,
194    /// Molecular geometry at which energy and forces were evaluated (coordinates in Angstrom)
195    pub geometry: Geometry,
196}
197impl State {
198    /// Validates that the State contains meaningful data and is not a zero-energy failure.
199    ///
200    /// This method checks for common failure modes where QM calculations appear to
201    /// succeed but actually return invalid or default values. It prevents silent
202    /// failures that could lead to incorrect MECP optimization results.
203    ///
204    /// # Validation Criteria
205    ///
206    /// The State is considered invalid if:
207    /// - Energy is exactly zero (indicates parsing failure or uninitialized state)
208    /// - All force components are zero (indicates gradient calculation failure)
209    /// - Forces vector is empty (indicates missing gradient data)
210    ///
211    /// # Returns
212    ///
213    /// - `Ok(())` if the State contains valid, meaningful data
214    /// - `Err(String)` with a descriptive error message if validation fails
215    ///
216    /// # Examples
217    ///
218    /// ```
219    /// use omecp::geometry::{Geometry, State};
220    /// use nalgebra::DVector;
221    ///
222    /// // Valid state with non-zero energy and forces
223    /// let geometry = Geometry::new(vec!["H".to_string()], vec![0.0, 0.0, 0.0]);
224    /// let valid_state = State {
225    ///     energy: -0.5,
226    ///     forces: DVector::from_vec(vec![0.1, -0.2, 0.0]),
227    ///     geometry,
228    /// };
229    /// assert!(valid_state.validate().is_ok());
230    ///
231    /// // Invalid state with zero energy
232    /// let geometry = Geometry::new(vec!["H".to_string()], vec![0.0, 0.0, 0.0]);
233    /// let invalid_state = State {
234    ///     energy: 0.0,
235    ///     forces: DVector::from_vec(vec![0.1, -0.2, 0.0]),
236    ///     geometry,
237    /// };
238    /// assert!(invalid_state.validate().is_err());
239    /// ```
240    pub fn validate(&self) -> Result<(), String> {
241        // Check for zero energy (common failure mode)
242        if self.energy == 0.0 {
243            return Err(
244                "State contains zero energy, indicating parsing failure or uninitialized state"
245                    .to_string(),
246            );
247        }
248
249        // Check for empty forces vector
250        if self.forces.is_empty() {
251            return Err(
252                "State contains empty forces vector, indicating missing gradient data".to_string(),
253            );
254        }
255
256        // Check for all-zero forces (gradient calculation failure)
257        if self.forces.iter().all(|&f| f == 0.0) {
258            return Err(
259                "State contains all-zero forces, indicating gradient calculation failure"
260                    .to_string(),
261            );
262        }
263
264        // Check force/geometry consistency
265        let expected_force_components = self.geometry.num_atoms * 3;
266        if self.forces.len() != expected_force_components {
267            return Err(format!(
268                "Force/geometry mismatch: expected {} force components for {} atoms, got {}",
269                expected_force_components,
270                self.geometry.num_atoms,
271                self.forces.len()
272            ));
273        }
274
275        Ok(())
276    }
277}