omecp/checkpoint.rs
1//! Checkpoint system for saving and restarting MECP calculations.
2//!
3//! This module provides functionality to save the complete state of a MECP
4//! optimization calculation and restart from that state later. This is essential
5//! for:
6//!
7//! - **Long calculations**: Save progress periodically
8//! - **Interrupted jobs**: Restart from the last checkpoint
9//! - **Parameter testing**: Resume with different settings
10//! - **Convergence recovery**: Restart from near-converged geometries
11//!
12//! # Checkpoint Format
13//!
14//! Checkpoints are stored as JSON files containing:
15//!
16//! - **Step number**: Current optimization step
17//! - **Geometry**: Current molecular geometry
18//! - **Optimization history**: Geometries, gradients, Hessians (for DIIS)
19//! - **Hessian matrix**: Current approximate Hessian
20//! - **Configuration**: Complete calculation settings
21//!
22//! # Serialization Strategy
23//!
24//! The checkpoint system uses serde for serialization. Since some types
25//! (DVector, DMatrix) from nalgebra don't directly serialize, wrapper types
26//! are used:
27//!
28//! - `SerializableGeometry`: Converts DVector coords to `Vec<f64>`
29//! - `SerializableOptimizationState`: Converts collections of DVector/DMatrix to Vec
30//!
31//! # Usage
32//!
33//! Checkpoints are automatically created during optimization but can also be
34//! used manually:
35//!
36//! ```no_run
37//! use omecp::checkpoint::{Checkpoint, CheckpointLoad};
38//! use omecp::geometry::Geometry;
39//! use omecp::optimizer::OptimizationState;
40//! use omecp::config::Config;
41//! use nalgebra::{DMatrix, DVector};
42//! use std::path::Path;
43//!
44//! fn main() -> Result<(), Box<dyn std::error::Error>> {
45//! // Create test data
46//! let elements = vec!["H".to_string()];
47//! let coords = vec![0.0, 0.0, 0.0];
48//! let geometry = Geometry::new(elements, coords);
49//! let x_old = DVector::from_vec(vec![0.0, 0.0, 0.0]);
50//! let hessian = DMatrix::identity(3, 3);
51//! let opt_state = OptimizationState::new(5);
52//! let config = Config::default();
53//!
54//! // Save checkpoint
55//! let checkpoint = Checkpoint::new(1, &geometry, &x_old, &hessian, &opt_state, &config);
56//! checkpoint.save(Path::new("mecp.chk"))?;
57//!
58//! // Load checkpoint
59//! let loaded: CheckpointLoad = Checkpoint::load(Path::new("mecp.chk"))?;
60//! println!("Loaded step: {}", loaded.step);
61//!
62//! std::fs::remove_file("mecp.chk")?;
63//! Ok(())
64//! }
65//! ```
66//!
67//! # File Location
68//!
69//! Checkpoint files are typically saved as `mecp.chk` or a user-specified name
70//! in the `running_dir/` directory for easy management.
71
72//! Checkpoint system for restarting MECP optimizations.
73//!
74//! This module provides functionality to save and restore optimization state
75//! including geometry, gradients, Hessian, and history for recovery.
76
77use crate::config::Config;
78use crate::geometry::Geometry;
79use crate::optimizer::OptimizationState;
80use nalgebra::{DMatrix, DVector};
81use serde::{Deserialize, Serialize};
82use std::fs;
83use std::path::Path;
84
85/// Default checkpoint version for backward compatibility.
86///
87/// Version history:
88/// - Version 1: Legacy format with coordinates in Angstrom (original)
89/// - Version 2: Legacy format with coordinates in Bohr (intermediate)
90/// - Version 3: Current format with coordinates in Angstrom (unit standardization)
91///
92/// The default is version 1 for backward compatibility when loading old checkpoints
93/// that don't have a version field.
94fn default_version() -> u32 {
95 1
96}
97
98// Serializable versions of structs containing nalgebra types
99
100/// Serializable wrapper for Geometry.
101///
102/// Since `Geometry` contains a `DVector<f64>` which doesn't directly serialize,
103/// this wrapper converts the vector to a plain `Vec<f64>` for JSON serialization.
104#[derive(Serialize, Deserialize)]
105pub struct SerializableGeometry {
106 /// Chemical element symbols
107 elements: Vec<String>,
108 /// Flattened coordinates as `Vec<f64>`
109 coords: Vec<f64>,
110 /// Number of atoms
111 num_atoms: usize,
112}
113
114impl From<&Geometry> for SerializableGeometry {
115 fn from(geom: &Geometry) -> Self {
116 Self {
117 elements: geom.elements.clone(),
118 coords: geom.coords.data.as_vec().clone(),
119 num_atoms: geom.num_atoms,
120 }
121 }
122}
123
124impl From<SerializableGeometry> for Geometry {
125 fn from(ser_geom: SerializableGeometry) -> Self {
126 Geometry::new(ser_geom.elements, ser_geom.coords)
127 }
128}
129
130/// Serializable wrapper for OptimizationState.
131///
132/// Converts collections of nalgebra types (DVector, DMatrix) to plain Vec
133/// types for JSON serialization. Preserves the optimization history needed
134/// for DIIS convergence acceleration.
135#[derive(Serialize, Deserialize)]
136pub struct SerializableOptimizationState {
137 /// Lagrange multipliers for constraints
138 lambdas: Vec<f64>,
139 /// Lagrange multiplier for energy difference constraint (FixDE mode)
140 lambda_de: Option<f64>,
141 /// Current constraint violations for extended gradient
142 constraint_violations: Vec<f64>,
143 /// History of geometries (for DIIS)
144 geom_history: Vec<Vec<f64>>,
145 /// History of gradients (for DIIS)
146 grad_history: Vec<Vec<f64>>,
147 /// History of Hessian matrices (for BFGS updates)
148 hess_history: Vec<Vec<Vec<f64>>>,
149 /// Maximum history size to maintain
150 max_history: usize,
151}
152
153impl From<&OptimizationState> for SerializableOptimizationState {
154 fn from(opt_state: &OptimizationState) -> Self {
155 Self {
156 lambdas: opt_state.lambdas.clone(),
157 lambda_de: opt_state.lambda_de,
158 constraint_violations: opt_state.constraint_violations.data.as_vec().clone(),
159 geom_history: opt_state
160 .geom_history
161 .iter()
162 .map(|v| v.data.as_vec().clone())
163 .collect(),
164 grad_history: opt_state
165 .grad_history
166 .iter()
167 .map(|v| v.data.as_vec().clone())
168 .collect(),
169 hess_history: opt_state
170 .hess_history
171 .iter()
172 .map(|m| {
173 m.row_iter()
174 .map(|row| row.iter().cloned().collect())
175 .collect()
176 })
177 .collect(),
178 max_history: opt_state.max_history,
179 }
180 }
181}
182
183impl From<SerializableOptimizationState> for OptimizationState {
184 fn from(ser_opt_state: SerializableOptimizationState) -> Self {
185 // Use max_history from serialized state to preserve the original configuration
186 let mut opt_state = OptimizationState::new(ser_opt_state.max_history);
187 opt_state.lambdas = ser_opt_state.lambdas;
188 opt_state.lambda_de = ser_opt_state.lambda_de;
189 opt_state.constraint_violations = DVector::from_vec(ser_opt_state.constraint_violations);
190 // max_history is already set in new()
191
192 for coords in ser_opt_state.geom_history {
193 opt_state.geom_history.push_back(DVector::from_vec(coords));
194 }
195 for grad in ser_opt_state.grad_history {
196 opt_state.grad_history.push_back(DVector::from_vec(grad));
197 }
198 for hess in ser_opt_state.hess_history {
199 let nrows = hess.len();
200 let ncols = hess[0].len();
201 let flat: Vec<f64> = hess.into_iter().flatten().collect();
202 opt_state
203 .hess_history
204 .push_back(DMatrix::from_row_slice(nrows, ncols, &flat));
205 }
206
207 opt_state
208 }
209}
210
211/// Checkpoint structure for saving/restoring MECP calculations.
212///
213/// A checkpoint contains all information needed to continue an optimization
214/// from a specific step, including the current geometry, optimization history,
215/// and calculation configuration.
216///
217/// # Unit Conventions
218///
219/// As of version 3 (unit standardization), all coordinates are stored in Angstrom (Å):
220/// - `geometry.coords`: Molecular coordinates in Angstrom
221/// - `x_old`: Previous geometry coordinates in Angstrom
222/// - `opt_state.geom_history`: Geometry history in Angstrom
223///
224/// Gradients are stored in Hartree/Angstrom (Ha/Å):
225/// - `opt_state.grad_history`: Gradient history in Ha/Å
226///
227/// # Version History
228///
229/// - Version 1: Original format with coordinates in Angstrom
230/// - Version 2: Intermediate format with coordinates in Bohr (deprecated)
231/// - Version 3: Current format with coordinates in Angstrom (unit standardization)
232///
233/// **Validates: Requirement 1.5**
234#[derive(Serialize, Deserialize)]
235pub struct Checkpoint {
236 /// Checkpoint format version.
237 ///
238 /// - Version 1: Coordinates in Angstrom (legacy)
239 /// - Version 2: Coordinates in Bohr (legacy, deprecated)
240 /// - Version 3: Coordinates in Angstrom (current, unit standardization)
241 #[serde(default = "default_version")]
242 pub version: u32,
243 /// Current optimization step number
244 pub step: usize,
245 /// Current molecular geometry
246 pub geometry: SerializableGeometry,
247 /// Previous geometry (for gradient computation)
248 pub x_old: Vec<f64>,
249 /// Current approximate Hessian matrix
250 pub hessian: Vec<Vec<f64>>,
251 /// Optimization state with history
252 pub opt_state: SerializableOptimizationState,
253 /// Complete calculation configuration
254 pub config: Config,
255}
256
257/// Loaded checkpoint contents returned by `Checkpoint::load`.
258///
259/// This struct contains the fully reconstructed runtime types converted from the
260/// serialized `Checkpoint` representation. It includes everything required to
261/// resume an optimization: the step counter, the molecular geometry, previous
262/// coordinates used for gradient differences, the current approximate Hessian,
263/// the optimization state (with DIIS/history structures), and the calculation
264/// configuration.
265pub struct CheckpointLoad {
266 /// Optimization step number at the time of saving.
267 pub step: usize,
268 /// Molecular geometry reconstructed from the checkpoint.
269 pub geometry: Geometry,
270 /// Previous geometry coordinates (flattened) used for finite differences.
271 pub x_old: DVector<f64>,
272 /// Hessian matrix (approximate) reconstructed as a nalgebra `DMatrix`.
273 pub hessian: DMatrix<f64>,
274 /// Optimization state including history, lambdas, and other runtime data.
275 pub opt_state: OptimizationState,
276 /// Calculation configuration used when the checkpoint was created.
277 pub config: Config,
278}
279
280impl Checkpoint {
281 /// Create a new checkpoint from current optimization state.
282 ///
283 /// # Arguments
284 ///
285 /// * `step` - Current optimization step number
286 /// * `geometry` - Current molecular geometry
287 /// * `x_old` - Previous geometry coordinates
288 /// * `hessian` - Current approximate Hessian matrix
289 /// * `opt_state` - Optimization state with history
290 /// * `config` - Calculation configuration
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// use omecp::checkpoint::Checkpoint;
296 /// use omecp::geometry::Geometry;
297 /// use omecp::optimizer::OptimizationState;
298 /// use omecp::config::Config;
299 /// use nalgebra::{DMatrix, DVector};
300 ///
301 /// let elements = vec!["H".to_string()];
302 /// let coords = vec![0.0, 0.0, 0.0];
303 /// let geometry = Geometry::new(elements, coords);
304 /// let x_old = vec![0.0, 0.0, 0.0];
305 /// let hessian = DMatrix::identity(3, 3);
306 /// let opt_state = OptimizationState::new(5); // max_history = 5
307 /// let config = Config::default();
308 ///
309 /// let checkpoint = Checkpoint::new(
310 /// 5, // Step 5
311 /// &geometry, // Current geometry
312 /// &DVector::from_vec(x_old), // Previous coords
313 /// &hessian, // Hessian matrix
314 /// &opt_state, // Optimization state
315 /// &config, // Configuration
316 /// );
317 /// ```
318 pub fn new(
319 step: usize,
320 geometry: &Geometry,
321 x_old: &DVector<f64>,
322 hessian: &DMatrix<f64>,
323 opt_state: &OptimizationState,
324 config: &Config,
325 ) -> Self {
326 // Version 3: Coordinates stored in Angstrom (unit standardization)
327 // This matches the internal coordinate storage convention where
328 // Geometry.coords are in Angstrom throughout the codebase.
329 //
330 // Validates: Requirement 1.5 - checkpoint saves coordinates in Angstrom
331 Self {
332 version: 3, // Version 3: coordinates in Angstrom (unit standardization)
333 step,
334 geometry: geometry.into(),
335 x_old: x_old.data.as_vec().clone(),
336 hessian: hessian
337 .row_iter()
338 .map(|row| row.iter().cloned().collect())
339 .collect(),
340 opt_state: opt_state.into(),
341 config: config.clone(),
342 }
343 }
344
345 /// Save checkpoint to a JSON file.
346 ///
347 /// # Arguments
348 ///
349 /// * `path` - Path where checkpoint will be saved
350 ///
351 /// # Errors
352 ///
353 /// Returns an error if:
354 /// - The file cannot be written
355 /// - Serialization fails
356 ///
357 /// # Examples
358 ///
359 /// ```
360 /// use omecp::checkpoint::Checkpoint;
361 /// use std::path::Path;
362 /// use omecp::geometry::Geometry;
363 /// use omecp::optimizer::OptimizationState;
364 /// use omecp::config::Config;
365 /// use nalgebra::{DMatrix, DVector};
366 ///
367 /// fn main() -> Result<(), Box<dyn std::error::Error>> {
368 /// let elements = vec!["H".to_string()];
369 /// let coords = vec![0.0, 0.0, 0.0];
370 /// let geometry = Geometry::new(elements, coords);
371 /// let x_old = vec![0.0, 0.0, 0.0];
372 /// let hessian = DMatrix::identity(3, 3);
373 /// let opt_state = OptimizationState::new(5); // max_history = 5
374 /// let config = Config::default();
375 ///
376 /// let checkpoint = Checkpoint::new(
377 /// 5,
378 /// &geometry,
379 /// &DVector::from_vec(x_old),
380 /// &hessian,
381 /// &opt_state,
382 /// &config,
383 /// );
384 ///
385 /// checkpoint.save(Path::new("mecp.chk"))?;
386 /// std::fs::remove_file("mecp.chk")?;
387 /// Ok(())
388 /// }
389 /// ```
390 pub fn save(&self, path: &Path) -> Result<(), Box<dyn std::error::Error>> {
391 let json = serde_json::to_string_pretty(self)?;
392 fs::write(path, json)?;
393 Ok(())
394 }
395
396 /// Load checkpoint from a JSON file.
397 ///
398 /// # Arguments
399 ///
400 /// * `path` - Path to checkpoint file
401 ///
402 /// # Returns
403 ///
404 /// Returns a `CheckpointLoad` struct containing:
405 /// - `step`: Optimization step number
406 /// - `geometry`: Molecular geometry
407 /// - `x_old`: Previous geometry coordinates
408 /// - `hessian`: Hessian matrix
409 /// - `opt_state`: Optimization state
410 /// - `config`: Calculation configuration
411 ///
412 /// # Errors
413 ///
414 /// Returns an error if:
415 /// - The file cannot be read
416 /// - Deserialization fails
417 /// - File format is invalid
418 ///
419 /// # Examples
420 ///
421 /// ```
422 /// use omecp::checkpoint::{Checkpoint, CheckpointLoad};
423 /// use std::path::Path;
424 /// use omecp::geometry::Geometry;
425 /// use omecp::optimizer::OptimizationState;
426 /// use omecp::config::Config;
427 /// use nalgebra::{DMatrix, DVector};
428 ///
429 /// fn main() -> Result<(), Box<dyn std::error::Error>> {
430 /// let elements = vec!["H".to_string()];
431 /// let coords = vec![0.0, 0.0, 0.0];
432 /// let geometry = Geometry::new(elements, coords);
433 /// let x_old = vec![0.0, 0.0, 0.0];
434 /// let hessian = DMatrix::identity(3, 3);
435 /// let opt_state = OptimizationState::new(5); // max_history = 5
436 /// let config = Config::default();
437 ///
438 /// let checkpoint = Checkpoint::new(
439 /// 5,
440 /// &geometry,
441 /// &DVector::from_vec(x_old),
442 /// &hessian,
443 /// &opt_state,
444 /// &config,
445 /// );
446 ///
447 /// checkpoint.save(Path::new("mecp.chk"))?;
448 ///
449 /// let loaded: CheckpointLoad = Checkpoint::load(Path::new("mecp.chk"))?;
450 ///
451 /// std::fs::remove_file("mecp.chk")?;
452 /// Ok(())
453 /// }
454 /// ```
455 pub fn load(path: &Path) -> Result<CheckpointLoad, Box<dyn std::error::Error>> {
456 use crate::geometry::bohr_to_angstrom;
457
458 let content = fs::read_to_string(path)?;
459 let checkpoint: Checkpoint = serde_json::from_str(&content)?;
460
461 let mut geometry = Geometry::from(checkpoint.geometry);
462 let mut x_old = DVector::from_vec(checkpoint.x_old);
463 let mut opt_state = OptimizationState::from(checkpoint.opt_state);
464
465 // Handle backward compatibility for different checkpoint versions
466 // Current internal storage: coordinates in Angstrom (unit standardization)
467 //
468 // Validates: Requirement 1.5 - backward compatibility for Bohr checkpoints
469 match checkpoint.version {
470 1 => {
471 // Version 1: coordinates already in Angstrom - no conversion needed
472 // This is the original format and matches our current internal storage
473 println!("INFO: Loading version 1 checkpoint (Angstrom coordinates)");
474 }
475 2 => {
476 // Version 2: coordinates in Bohr - convert to Angstrom
477 // This was an intermediate format that stored coordinates in Bohr
478 println!("WARNING: Loading version 2 checkpoint with Bohr coordinates");
479 println!(" Converting to Angstrom for unit standardization");
480
481 geometry.coords = bohr_to_angstrom(&geometry.coords);
482 x_old = bohr_to_angstrom(&x_old);
483
484 // Convert geometry history in optimization state
485 for geom in &mut opt_state.geom_history {
486 *geom = bohr_to_angstrom(geom);
487 }
488 }
489 3 => {
490 // Version 3: coordinates in Angstrom (current format) - no conversion needed
491 // This is the unit standardization format
492 }
493 _ => {
494 // Unknown version - attempt to detect units by magnitude
495 // Typical molecular coordinates in Angstrom: 0.5 - 5 Å
496 // Typical molecular coordinates in Bohr: 1 - 10 a₀
497 // If max coordinate > 10, likely in Bohr
498 let max_coord = geometry.coords.iter().map(|x| x.abs()).fold(0.0_f64, f64::max);
499
500 if max_coord > 10.0 {
501 println!("WARNING: Unknown checkpoint version {}, coordinates appear to be in Bohr", checkpoint.version);
502 println!(" (max coordinate magnitude: {:.2})", max_coord);
503 println!(" Converting to Angstrom for unit standardization");
504
505 geometry.coords = bohr_to_angstrom(&geometry.coords);
506 x_old = bohr_to_angstrom(&x_old);
507
508 for geom in &mut opt_state.geom_history {
509 *geom = bohr_to_angstrom(geom);
510 }
511 } else {
512 println!("INFO: Unknown checkpoint version {}, assuming Angstrom coordinates", checkpoint.version);
513 }
514 }
515 }
516
517 let nrows = checkpoint.hessian.len();
518 let ncols = if nrows > 0 {
519 checkpoint.hessian[0].len()
520 } else {
521 0
522 };
523 let hess_flat: Vec<f64> = checkpoint.hessian.into_iter().flatten().collect();
524 let hessian = if nrows > 0 && ncols > 0 {
525 DMatrix::from_row_slice(nrows, ncols, &hess_flat)
526 } else {
527 DMatrix::from_row_slice(0, 0, &[])
528 };
529
530 Ok(CheckpointLoad {
531 step: checkpoint.step,
532 geometry,
533 x_old,
534 hessian,
535 opt_state,
536 config: checkpoint.config,
537 })
538 }
539}