Skip to main content
← OpenMECP Documentation

omecp/
checkpoint.rs

1//! Checkpoint system for saving and restarting MECP calculations.
2//!
3//! This module provides functionality to save the complete state of a MECP
4//! optimization calculation and restart from that state later. This is essential
5//! for:
6//!
7//! - **Long calculations**: Save progress periodically
8//! - **Interrupted jobs**: Restart from the last checkpoint
9//! - **Parameter testing**: Resume with different settings
10//! - **Convergence recovery**: Restart from near-converged geometries
11//!
12//! # Checkpoint Format
13//!
14//! Checkpoints are stored as JSON files containing:
15//!
16//! - **Step number**: Current optimization step
17//! - **Geometry**: Current molecular geometry
18//! - **Optimization history**: Geometries, gradients, Hessians (for DIIS)
19//! - **Hessian matrix**: Current approximate Hessian
20//! - **Configuration**: Complete calculation settings
21//!
22//! # Serialization Strategy
23//!
24//! The checkpoint system uses serde for serialization. Since some types
25//! (DVector, DMatrix) from nalgebra don't directly serialize, wrapper types
26//! are used:
27//!
28//! - `SerializableGeometry`: Converts DVector coords to `Vec<f64>`
29//! - `SerializableOptimizationState`: Converts collections of DVector/DMatrix to Vec
30//!
31//! # Usage
32//!
33//! Checkpoints are automatically created during optimization but can also be
34//! used manually:
35//!
36//! ```no_run
37//! use omecp::checkpoint::{Checkpoint, CheckpointLoad};
38//! use omecp::geometry::Geometry;
39//! use omecp::optimizer::OptimizationState;
40//! use omecp::config::Config;
41//! use nalgebra::{DMatrix, DVector};
42//! use std::path::Path;
43//!
44//! fn main() -> Result<(), Box<dyn std::error::Error>> {
45//!     // Create test data
46//!     let elements = vec!["H".to_string()];
47//!     let coords = vec![0.0, 0.0, 0.0];
48//!     let geometry = Geometry::new(elements, coords);
49//!     let x_old = DVector::from_vec(vec![0.0, 0.0, 0.0]);
50//!     let hessian = DMatrix::identity(3, 3);
51//!     let opt_state = OptimizationState::new(5);
52//!     let config = Config::default();
53//!
54//!     // Save checkpoint
55//!     let checkpoint = Checkpoint::new(1, &geometry, &x_old, &hessian, &opt_state, &config);
56//!     checkpoint.save(Path::new("mecp.chk"))?;
57//!
58//!     // Load checkpoint
59//!     let loaded: CheckpointLoad = Checkpoint::load(Path::new("mecp.chk"))?;
60//!     println!("Loaded step: {}", loaded.step);
61//!
62//!     std::fs::remove_file("mecp.chk")?;
63//!     Ok(())
64//! }
65//! ```
66//!
67//! # File Location
68//!
69//! Checkpoint files are typically saved as `mecp.chk` or a user-specified name
70//! in the `running_dir/` directory for easy management.
71
72//! Checkpoint system for restarting MECP optimizations.
73//!
74//! This module provides functionality to save and restore optimization state
75//! including geometry, gradients, Hessian, and history for recovery.
76
77use crate::config::Config;
78use crate::geometry::Geometry;
79use crate::optimizer::OptimizationState;
80use nalgebra::{DMatrix, DVector};
81use serde::{Deserialize, Serialize};
82use std::fs;
83use std::path::Path;
84
85/// Default checkpoint version for backward compatibility.
86///
87/// Version history:
88/// - Version 1: Legacy format with coordinates in Angstrom (original)
89/// - Version 2: Legacy format with coordinates in Bohr (intermediate)
90/// - Version 3: Current format with coordinates in Angstrom (unit standardization)
91///
92/// The default is version 1 for backward compatibility when loading old checkpoints
93/// that don't have a version field.
94fn default_version() -> u32 {
95    1
96}
97
98// Serializable versions of structs containing nalgebra types
99
100/// Serializable wrapper for Geometry.
101///
102/// Since `Geometry` contains a `DVector<f64>` which doesn't directly serialize,
103/// this wrapper converts the vector to a plain `Vec<f64>` for JSON serialization.
104#[derive(Serialize, Deserialize)]
105pub struct SerializableGeometry {
106    /// Chemical element symbols
107    elements: Vec<String>,
108    /// Flattened coordinates as `Vec<f64>`
109    coords: Vec<f64>,
110    /// Number of atoms
111    num_atoms: usize,
112}
113
114impl From<&Geometry> for SerializableGeometry {
115    fn from(geom: &Geometry) -> Self {
116        Self {
117            elements: geom.elements.clone(),
118            coords: geom.coords.data.as_vec().clone(),
119            num_atoms: geom.num_atoms,
120        }
121    }
122}
123
124impl From<SerializableGeometry> for Geometry {
125    fn from(ser_geom: SerializableGeometry) -> Self {
126        Geometry::new(ser_geom.elements, ser_geom.coords)
127    }
128}
129
130/// Serializable wrapper for OptimizationState.
131///
132/// Converts collections of nalgebra types (DVector, DMatrix) to plain Vec
133/// types for JSON serialization. Preserves the optimization history needed
134/// for DIIS convergence acceleration.
135#[derive(Serialize, Deserialize)]
136pub struct SerializableOptimizationState {
137    /// Lagrange multipliers for constraints
138    lambdas: Vec<f64>,
139    /// Lagrange multiplier for energy difference constraint (FixDE mode)
140    lambda_de: Option<f64>,
141    /// Current constraint violations for extended gradient
142    constraint_violations: Vec<f64>,
143    /// History of geometries (for DIIS)
144    geom_history: Vec<Vec<f64>>,
145    /// History of gradients (for DIIS)
146    grad_history: Vec<Vec<f64>>,
147    /// History of Hessian matrices (for BFGS updates)
148    hess_history: Vec<Vec<Vec<f64>>>,
149    /// Maximum history size to maintain
150    max_history: usize,
151}
152
153impl From<&OptimizationState> for SerializableOptimizationState {
154    fn from(opt_state: &OptimizationState) -> Self {
155        Self {
156            lambdas: opt_state.lambdas.clone(),
157            lambda_de: opt_state.lambda_de,
158            constraint_violations: opt_state.constraint_violations.data.as_vec().clone(),
159            geom_history: opt_state
160                .geom_history
161                .iter()
162                .map(|v| v.data.as_vec().clone())
163                .collect(),
164            grad_history: opt_state
165                .grad_history
166                .iter()
167                .map(|v| v.data.as_vec().clone())
168                .collect(),
169            hess_history: opt_state
170                .hess_history
171                .iter()
172                .map(|m| {
173                    m.row_iter()
174                        .map(|row| row.iter().cloned().collect())
175                        .collect()
176                })
177                .collect(),
178            max_history: opt_state.max_history,
179        }
180    }
181}
182
183impl From<SerializableOptimizationState> for OptimizationState {
184    fn from(ser_opt_state: SerializableOptimizationState) -> Self {
185        // Use max_history from serialized state to preserve the original configuration
186        let mut opt_state = OptimizationState::new(ser_opt_state.max_history);
187        opt_state.lambdas = ser_opt_state.lambdas;
188        opt_state.lambda_de = ser_opt_state.lambda_de;
189        opt_state.constraint_violations = DVector::from_vec(ser_opt_state.constraint_violations);
190        // max_history is already set in new()
191
192        for coords in ser_opt_state.geom_history {
193            opt_state.geom_history.push_back(DVector::from_vec(coords));
194        }
195        for grad in ser_opt_state.grad_history {
196            opt_state.grad_history.push_back(DVector::from_vec(grad));
197        }
198        for hess in ser_opt_state.hess_history {
199            let nrows = hess.len();
200            let ncols = hess[0].len();
201            let flat: Vec<f64> = hess.into_iter().flatten().collect();
202            opt_state
203                .hess_history
204                .push_back(DMatrix::from_row_slice(nrows, ncols, &flat));
205        }
206
207        opt_state
208    }
209}
210
211/// Checkpoint structure for saving/restoring MECP calculations.
212///
213/// A checkpoint contains all information needed to continue an optimization
214/// from a specific step, including the current geometry, optimization history,
215/// and calculation configuration.
216///
217/// # Unit Conventions
218///
219/// As of version 3 (unit standardization), all coordinates are stored in Angstrom (Å):
220/// - `geometry.coords`: Molecular coordinates in Angstrom
221/// - `x_old`: Previous geometry coordinates in Angstrom
222/// - `opt_state.geom_history`: Geometry history in Angstrom
223///
224/// Gradients are stored in Hartree/Angstrom (Ha/Å):
225/// - `opt_state.grad_history`: Gradient history in Ha/Å
226///
227/// # Version History
228///
229/// - Version 1: Original format with coordinates in Angstrom
230/// - Version 2: Intermediate format with coordinates in Bohr (deprecated)
231/// - Version 3: Current format with coordinates in Angstrom (unit standardization)
232///
233/// **Validates: Requirement 1.5**
234#[derive(Serialize, Deserialize)]
235pub struct Checkpoint {
236    /// Checkpoint format version.
237    ///
238    /// - Version 1: Coordinates in Angstrom (legacy)
239    /// - Version 2: Coordinates in Bohr (legacy, deprecated)
240    /// - Version 3: Coordinates in Angstrom (current, unit standardization)
241    #[serde(default = "default_version")]
242    pub version: u32,
243    /// Current optimization step number
244    pub step: usize,
245    /// Current molecular geometry
246    pub geometry: SerializableGeometry,
247    /// Previous geometry (for gradient computation)
248    pub x_old: Vec<f64>,
249    /// Current approximate Hessian matrix
250    pub hessian: Vec<Vec<f64>>,
251    /// Optimization state with history
252    pub opt_state: SerializableOptimizationState,
253    /// Complete calculation configuration
254    pub config: Config,
255}
256
257/// Loaded checkpoint contents returned by `Checkpoint::load`.
258///
259/// This struct contains the fully reconstructed runtime types converted from the
260/// serialized `Checkpoint` representation. It includes everything required to
261/// resume an optimization: the step counter, the molecular geometry, previous
262/// coordinates used for gradient differences, the current approximate Hessian,
263/// the optimization state (with DIIS/history structures), and the calculation
264/// configuration.
265pub struct CheckpointLoad {
266    /// Optimization step number at the time of saving.
267    pub step: usize,
268    /// Molecular geometry reconstructed from the checkpoint.
269    pub geometry: Geometry,
270    /// Previous geometry coordinates (flattened) used for finite differences.
271    pub x_old: DVector<f64>,
272    /// Hessian matrix (approximate) reconstructed as a nalgebra `DMatrix`.
273    pub hessian: DMatrix<f64>,
274    /// Optimization state including history, lambdas, and other runtime data.
275    pub opt_state: OptimizationState,
276    /// Calculation configuration used when the checkpoint was created.
277    pub config: Config,
278}
279
280impl Checkpoint {
281    /// Create a new checkpoint from current optimization state.
282    ///
283    /// # Arguments
284    ///
285    /// * `step` - Current optimization step number
286    /// * `geometry` - Current molecular geometry
287    /// * `x_old` - Previous geometry coordinates
288    /// * `hessian` - Current approximate Hessian matrix
289    /// * `opt_state` - Optimization state with history
290    /// * `config` - Calculation configuration
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// use omecp::checkpoint::Checkpoint;
296    /// use omecp::geometry::Geometry;
297    /// use omecp::optimizer::OptimizationState;
298    /// use omecp::config::Config;
299    /// use nalgebra::{DMatrix, DVector};
300    ///
301    /// let elements = vec!["H".to_string()];
302    /// let coords = vec![0.0, 0.0, 0.0];
303    /// let geometry = Geometry::new(elements, coords);
304    /// let x_old = vec![0.0, 0.0, 0.0];
305    /// let hessian = DMatrix::identity(3, 3);
306    /// let opt_state = OptimizationState::new(5);  // max_history = 5
307    /// let config = Config::default();
308    ///
309    /// let checkpoint = Checkpoint::new(
310    ///     5,                          // Step 5
311    ///     &geometry,                  // Current geometry
312    ///     &DVector::from_vec(x_old),  // Previous coords
313    ///     &hessian,                   // Hessian matrix
314    ///     &opt_state,                 // Optimization state
315    ///     &config,                    // Configuration
316    /// );
317    /// ```
318    pub fn new(
319        step: usize,
320        geometry: &Geometry,
321        x_old: &DVector<f64>,
322        hessian: &DMatrix<f64>,
323        opt_state: &OptimizationState,
324        config: &Config,
325    ) -> Self {
326        // Version 3: Coordinates stored in Angstrom (unit standardization)
327        // This matches the internal coordinate storage convention where
328        // Geometry.coords are in Angstrom throughout the codebase.
329        //
330        // Validates: Requirement 1.5 - checkpoint saves coordinates in Angstrom
331        Self {
332            version: 3, // Version 3: coordinates in Angstrom (unit standardization)
333            step,
334            geometry: geometry.into(),
335            x_old: x_old.data.as_vec().clone(),
336            hessian: hessian
337                .row_iter()
338                .map(|row| row.iter().cloned().collect())
339                .collect(),
340            opt_state: opt_state.into(),
341            config: config.clone(),
342        }
343    }
344
345    /// Save checkpoint to a JSON file.
346    ///
347    /// # Arguments
348    ///
349    /// * `path` - Path where checkpoint will be saved
350    ///
351    /// # Errors
352    ///
353    /// Returns an error if:
354    /// - The file cannot be written
355    /// - Serialization fails
356    ///
357    /// # Examples
358    ///
359    /// ```
360    /// use omecp::checkpoint::Checkpoint;
361    /// use std::path::Path;
362    /// use omecp::geometry::Geometry;
363    /// use omecp::optimizer::OptimizationState;
364    /// use omecp::config::Config;
365    /// use nalgebra::{DMatrix, DVector};
366    ///
367    /// fn main() -> Result<(), Box<dyn std::error::Error>> {
368    ///     let elements = vec!["H".to_string()];
369    ///     let coords = vec![0.0, 0.0, 0.0];
370    ///     let geometry = Geometry::new(elements, coords);
371    ///     let x_old = vec![0.0, 0.0, 0.0];
372    ///     let hessian = DMatrix::identity(3, 3);
373    ///     let opt_state = OptimizationState::new(5);  // max_history = 5
374    ///     let config = Config::default();
375    ///
376    ///     let checkpoint = Checkpoint::new(
377    ///         5,
378    ///         &geometry,
379    ///         &DVector::from_vec(x_old),
380    ///         &hessian,
381    ///         &opt_state,
382    ///         &config,
383    ///     );
384    ///
385    ///     checkpoint.save(Path::new("mecp.chk"))?;
386    ///     std::fs::remove_file("mecp.chk")?;
387    ///     Ok(())
388    /// }
389    /// ```
390    pub fn save(&self, path: &Path) -> Result<(), Box<dyn std::error::Error>> {
391        let json = serde_json::to_string_pretty(self)?;
392        fs::write(path, json)?;
393        Ok(())
394    }
395
396    /// Load checkpoint from a JSON file.
397    ///
398    /// # Arguments
399    ///
400    /// * `path` - Path to checkpoint file
401    ///
402    /// # Returns
403    ///
404    /// Returns a `CheckpointLoad` struct containing:
405    /// - `step`: Optimization step number
406    /// - `geometry`: Molecular geometry
407    /// - `x_old`: Previous geometry coordinates
408    /// - `hessian`: Hessian matrix
409    /// - `opt_state`: Optimization state
410    /// - `config`: Calculation configuration
411    ///
412    /// # Errors
413    ///
414    /// Returns an error if:
415    /// - The file cannot be read
416    /// - Deserialization fails
417    /// - File format is invalid
418    ///
419    /// # Examples
420    ///
421    /// ```
422    /// use omecp::checkpoint::{Checkpoint, CheckpointLoad};
423    /// use std::path::Path;
424    /// use omecp::geometry::Geometry;
425    /// use omecp::optimizer::OptimizationState;
426    /// use omecp::config::Config;
427    /// use nalgebra::{DMatrix, DVector};
428    ///
429    /// fn main() -> Result<(), Box<dyn std::error::Error>> {
430    ///     let elements = vec!["H".to_string()];
431    ///     let coords = vec![0.0, 0.0, 0.0];
432    ///     let geometry = Geometry::new(elements, coords);
433    ///     let x_old = vec![0.0, 0.0, 0.0];
434    ///     let hessian = DMatrix::identity(3, 3);
435    ///     let opt_state = OptimizationState::new(5);  // max_history = 5
436    ///     let config = Config::default();
437    ///
438    ///     let checkpoint = Checkpoint::new(
439    ///         5,
440    ///         &geometry,
441    ///         &DVector::from_vec(x_old),
442    ///         &hessian,
443    ///         &opt_state,
444    ///         &config,
445    ///     );
446    ///
447    ///     checkpoint.save(Path::new("mecp.chk"))?;
448    ///
449    ///     let loaded: CheckpointLoad = Checkpoint::load(Path::new("mecp.chk"))?;
450    ///
451    ///     std::fs::remove_file("mecp.chk")?;
452    ///     Ok(())
453    /// }
454    /// ```
455    pub fn load(path: &Path) -> Result<CheckpointLoad, Box<dyn std::error::Error>> {
456        use crate::geometry::bohr_to_angstrom;
457        
458        let content = fs::read_to_string(path)?;
459        let checkpoint: Checkpoint = serde_json::from_str(&content)?;
460
461        let mut geometry = Geometry::from(checkpoint.geometry);
462        let mut x_old = DVector::from_vec(checkpoint.x_old);
463        let mut opt_state = OptimizationState::from(checkpoint.opt_state);
464
465        // Handle backward compatibility for different checkpoint versions
466        // Current internal storage: coordinates in Angstrom (unit standardization)
467        //
468        // Validates: Requirement 1.5 - backward compatibility for Bohr checkpoints
469        match checkpoint.version {
470            1 => {
471                // Version 1: coordinates already in Angstrom - no conversion needed
472                // This is the original format and matches our current internal storage
473                println!("INFO: Loading version 1 checkpoint (Angstrom coordinates)");
474            }
475            2 => {
476                // Version 2: coordinates in Bohr - convert to Angstrom
477                // This was an intermediate format that stored coordinates in Bohr
478                println!("WARNING: Loading version 2 checkpoint with Bohr coordinates");
479                println!("         Converting to Angstrom for unit standardization");
480                
481                geometry.coords = bohr_to_angstrom(&geometry.coords);
482                x_old = bohr_to_angstrom(&x_old);
483
484                // Convert geometry history in optimization state
485                for geom in &mut opt_state.geom_history {
486                    *geom = bohr_to_angstrom(geom);
487                }
488            }
489            3 => {
490                // Version 3: coordinates in Angstrom (current format) - no conversion needed
491                // This is the unit standardization format
492            }
493            _ => {
494                // Unknown version - attempt to detect units by magnitude
495                // Typical molecular coordinates in Angstrom: 0.5 - 5 Å
496                // Typical molecular coordinates in Bohr: 1 - 10 a₀
497                // If max coordinate > 10, likely in Bohr
498                let max_coord = geometry.coords.iter().map(|x| x.abs()).fold(0.0_f64, f64::max);
499                
500                if max_coord > 10.0 {
501                    println!("WARNING: Unknown checkpoint version {}, coordinates appear to be in Bohr", checkpoint.version);
502                    println!("         (max coordinate magnitude: {:.2})", max_coord);
503                    println!("         Converting to Angstrom for unit standardization");
504                    
505                    geometry.coords = bohr_to_angstrom(&geometry.coords);
506                    x_old = bohr_to_angstrom(&x_old);
507
508                    for geom in &mut opt_state.geom_history {
509                        *geom = bohr_to_angstrom(geom);
510                    }
511                } else {
512                    println!("INFO: Unknown checkpoint version {}, assuming Angstrom coordinates", checkpoint.version);
513                }
514            }
515        }
516
517        let nrows = checkpoint.hessian.len();
518        let ncols = if nrows > 0 {
519            checkpoint.hessian[0].len()
520        } else {
521            0
522        };
523        let hess_flat: Vec<f64> = checkpoint.hessian.into_iter().flatten().collect();
524        let hessian = if nrows > 0 && ncols > 0 {
525            DMatrix::from_row_slice(nrows, ncols, &hess_flat)
526        } else {
527            DMatrix::from_row_slice(0, 0, &[])
528        };
529
530        Ok(CheckpointLoad {
531            step: checkpoint.step,
532            geometry,
533            x_old,
534            hessian,
535            opt_state,
536            config: checkpoint.config,
537        })
538    }
539}