Skip to main content
← OpenMECP Documentation

omecp/
qm_interface.rs

1//! Quantum chemistry program interfaces for MECP calculations.
2//!
3//! This module provides a unified interface for running calculations with different
4//! quantum chemistry programs. It abstracts away the differences between programs
5//! (Gaussian, ORCA, Bagel, XTB, custom interfaces) and provides a consistent API
6//! for:
7//!
8//! - Writing input files
9//! - Executing calculations
10//! - Parsing output files
11//! - Extracting energies, forces, and geometries
12//!
13//! # Supported Programs
14//!
15//! - **Gaussian**: Full feature support with checkpoint files, TD-DFT, MP2, etc.
16//! - **ORCA**: Energy and gradient files with GBW checkpoints
17//! - **Bagel**: CASSCF and MRCI calculations with JSON model files
18//! - **XTB**: Fast semi-empirical calculations using GFN2-xTB
19//! - **Custom**: JSON-configurable interface for any program
20//!
21//! # Interface Design
22//!
23//! The [`QMInterface`] trait defines the contract that all QM programs must implement.
24//! Each program has its own implementation that handles:
25//! - Program-specific input file format
26//! - Program execution commands and flags
27//! - Output parsing and data extraction
28//! - Error handling specific to the program
29//!
30//! # Usage Pattern
31//!
32//! ```rust
33//! use omecp::qm_interface::{QMInterface, GaussianInterface};
34//!
35//! let gaussian = GaussianInterface::new("g16".to_string(), false);
36//! gaussian.write_input(&geometry, &header, &tail, Path::new("input.gjf"))?;
37//! gaussian.run_calculation(Path::new("input.gjf"))?;
38//! let state = gaussian.read_output(Path::new("output.log"), 0)?;
39//! ```
40//!
41//! # Error Handling
42//!
43//! All operations return a [`QMError`] result that can be:
44//! - `IO`: File system errors (missing files, permission issues)
45//! - `Calculation`: QM program execution failures
46//! - `Parse`: Output parsing errors (malformed or unexpected output)
47//!
48//! # State Extraction
49//!
50//! Each call to `read_output` extracts a [`State`] containing:
51//! - Energy (in hartree)
52//! - Forces/gradients (in hartree/bohr)
53//! - Final geometry
54
55use crate::geometry::{bohr_to_angstrom, forces_bohr_to_angstrom, Geometry, State};
56use crate::io;
57use lazy_static::lazy_static;
58use nalgebra::DVector;
59use regex::Regex;
60
61use std::fs;
62use std::path::Path;
63use std::process::{Command, Stdio};
64use thiserror::Error;
65
66/// Error type for QM interface operations.
67///
68/// QM calculations can fail at three stages:
69/// 1. **I/O**: File operations (reading/writing input/output files)
70/// 2. **Calculation**: Program execution (segfaults, convergence failures, etc.)
71/// 3. **Parsing**: Output file interpretation (malformed data, unexpected format)
72#[derive(Error, Debug)]
73pub enum QMError {
74    /// File system or I/O operation failed
75    #[error("IO error: {0}")]
76    Io(#[from] std::io::Error),
77    /// Quantum chemistry program execution failed
78    #[error("QM calculation failed: {0}")]
79    Calculation(String),
80    /// Failed to parse program output
81    #[error("Parse error: {0}")]
82    Parse(String),
83}
84
85/// Type alias for QM operation results
86type Result<T> = std::result::Result<T, QMError>;
87
88/// Trait that defines the interface for all quantum chemistry programs.
89///
90/// This trait must be implemented by each QM program to provide a uniform
91/// interface for MECP calculations. It handles the complete lifecycle of
92/// a single-point calculation: input generation, execution, and output parsing.
93pub trait QMInterface {
94    /// Writes a calculation input file for the quantum chemistry program.
95    ///
96    /// # Arguments
97    ///
98    /// * `geom` - Molecular geometry with element types and coordinates
99    /// * `header` - Program-specific header section (methods, basis sets, options)
100    /// * `tail` - Additional keywords or sections specific to the calculation
101    /// * `path` - Output path for the input file
102    ///
103    /// # Returns
104    ///
105    /// Returns `Ok(())` on success, or a `QMError` if file writing fails.
106    fn write_input(&self, geom: &Geometry, header: &str, tail: &str, path: &Path) -> Result<()>;
107
108    /// Executes the quantum chemistry calculation.
109    ///
110    /// Runs the external QM program with the input file and waits for completion.
111    /// The program output is captured and checked for success status.
112    ///
113    /// # Arguments
114    ///
115    /// * `input_path` - Path to the input file to execute
116    ///
117    /// # Returns
118    ///
119    /// Returns `Ok(())` if the calculation succeeds, or a `QMError` if:
120    /// - The program cannot be executed (not found, permissions)
121    /// - The calculation fails (non-zero exit status)
122    /// - Runtime errors occur during execution
123    fn run_calculation(&self, input_path: &Path) -> Result<()>;
124
125    /// Parses the calculation output file to extract results.
126    ///
127    /// Reads and parses the program output file to extract the electronic state
128    /// properties needed for MECP optimization.
129    ///
130    /// # Arguments
131    ///
132    /// * `output_path` - Path to the output/log file to parse
133    /// * `state` - Electronic state index to extract (0 = ground state, >0 = excited state)
134    ///
135    /// # Returns
136    ///
137    /// Returns a `Result` containing:
138    /// - `Ok(State)` - Successfully parsed state with energy, forces, and geometry
139    /// - `Err(QMError::Parse)` - Failed to parse required data from output
140    /// - `Err(QMError::IO)` - Cannot read output file
141    ///
142    /// # State Selection
143    ///
144    /// The `state` parameter selects which electronic state to extract:
145    /// - `state = 0`: Ground state (S0 for singlet, S1 for triplet, etc.)
146    /// - `state = n`: n-th excited state (for TD-DFT calculations)
147    ///
148    /// Different programs interpret this differently:
149    /// - **Gaussian**: State index for TD-DFT excited states
150    /// - **ORCA**: State index for excited state calculations
151    /// - **Other programs**: May ignore this parameter
152    fn read_output(&self, output_path: &Path, original_geometry: &Geometry, state: usize) -> Result<State>;
153}
154
155/// Gaussian quantum chemistry program interface.
156///
157/// Provides support for Gaussian calculations including:
158/// - DFT, TD-DFT, MP2, and post-HF methods
159/// - Excited state calculations via TD-DFT
160/// - Checkpoint files for wavefunction continuity
161/// - Both energy-only and gradient calculations
162///
163/// # Examples
164///
165/// ```
166/// use omecp::qm_interface::GaussianInterface;
167/// use std::path::Path;
168///
169/// // Create interface with Gaussian 16
170/// let gaussian = GaussianInterface::new("g16".to_string(), false);
171///
172/// // Create interface with MP2 enabled
173/// let gaussian_mp2 = GaussianInterface::new("g16".to_string(), true);
174/// ```
175pub struct GaussianInterface {
176    /// Gaussian command to execute (e.g., "g16", "g09", "gview")
177    pub command: String,
178    /// Enable MP2 energy extraction from checkpoint archive
179    pub mp2: bool,
180}
181
182impl GaussianInterface {
183    /// Creates a new Gaussian interface.
184    ///
185    /// # Arguments
186    ///
187    /// * `command` - Gaussian executable command (e.g., "g16", "/path/to/g16")
188    /// * `mp2` - If true, extracts MP2 energy from checkpoint archive instead of SCF
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// use omecp::qm_interface::GaussianInterface;
194    ///
195    /// // Standard DFT calculation
196    /// let g16 = GaussianInterface::new("g16".to_string(), false);
197    ///
198    /// // MP2 calculation
199    /// let g16_mp2 = GaussianInterface::new("g16".to_string(), true);
200    /// ```
201    pub fn new(command: String, mp2: bool) -> Self {
202        Self { command, mp2 }
203    }
204}
205
206lazy_static! {
207    // Robust floating-point regex: handles 1.23, -0.032, 1.2e-4, .123, etc.
208    static ref FLOAT_RE: String = r"[-+]?(?:\d+\.\d*|\.\d+)(?:[eE][-+]?\d+)?".to_string();
209
210    // Excited state line: " Excited State   1: Singlet-A 3.9741 eV ..."
211    static ref EXCITED_RE: Regex = Regex::new(&format!(
212        r"^\s*Excited\s+State\s+(\d+)\s*:\s*.*?({0})\s+eV",
213        *FLOAT_RE
214    )).unwrap();
215
216    // CIS/TDA total energy: " Total Energy, E(CIS/TDA) = -2823.11144861"
217    static ref CIS_TOTAL_RE: Regex = Regex::new(&format!(
218        r"Total\s+Energy,\s+E\(CIS/TDA\)\s*=\s*({0})",
219        *FLOAT_RE
220    )).unwrap();
221
222    // Force line: " 1 5 -0.032351682 0.050284933 0.089439223"
223    static ref FORCE_RE: Regex = Regex::new(&format!(
224        r"^\s*\d+\s+\d+\s+({0})\s+({0})\s+({0})",
225        *FLOAT_RE
226    )).unwrap();
227
228    // Geometry line: " 1 5 0 -0.032351682 0.050284933 0.089439223"
229    static ref GEOM_RE: Regex = Regex::new(&format!(
230        r"^\s*\d+\s+(\d+)\s+\d+\s+({0})\s+({0})\s+({0})",
231        *FLOAT_RE
232    )).unwrap();
233}
234
235impl QMInterface for GaussianInterface {
236    fn write_input(&self, geom: &Geometry, header: &str, tail: &str, path: &Path) -> Result<()> {
237        let mut content = String::new();
238        content.push_str(header);
239        content.push('\n');
240
241        for i in 0..geom.num_atoms {
242            let coords = geom.get_atom_coords(i);
243            // Coordinates are already in Angstrom - write directly
244            content.push_str(&format!(
245                "{}  {:.8}  {:.8}  {:.8}\n",
246                geom.elements[i], coords[0], coords[1], coords[2]
247            ));
248        }
249
250        content.push('\n');
251        content.push_str(tail);
252        content.push('\n');
253
254        fs::write(path, content)?;
255        Ok(())
256    }
257
258    fn run_calculation(&self, input_path: &Path) -> Result<()> {
259        let output = Command::new(&self.command).arg(input_path).output()?;
260
261        if !output.status.success() {
262            return Err(QMError::Calculation(
263                String::from_utf8_lossy(&output.stderr).to_string(),
264            ));
265        }
266        Ok(())
267    }
268
269    fn read_output(&self, output_path: &Path, original_geometry: &Geometry, _state: usize) -> Result<State> {
270        let content = fs::read_to_string(output_path)
271            .map_err(QMError::Io)?;
272
273        let mut energy = 0.0;
274        let mut forces = Vec::new();
275        let mut geom_coords = Vec::new();
276        let mut in_forces = false;
277        let mut in_geom = false;
278        let mut archive_part = String::new();
279        for line in content.lines() {
280            let trimmed = line.trim();
281            // === 1. SCF Energy ===
282            if line.contains("SCF Done") {
283                if let Some(eq_pos) = line.find('=') {
284                    if let Some(val) = line[eq_pos + 1..].split_whitespace().next() {
285                        energy = val.parse().unwrap_or(0.0);
286                    }
287                }
288            }
289            // === 1b. Extrapolated Energy (ONIOM fallback) ===
290            else if line.contains("extrapolated energy") {
291                if let Some(eq_pos) = line.find('=') {
292                    if let Some(val) = line[eq_pos + 1..].split_whitespace().next() {
293                        energy = val.parse().unwrap_or(0.0);
294                    }
295                }
296            }
297            // === 2. TD-DFT / CIS / TDA Energy ===
298            else if line.contains("E(TD-HF/TD-DFT)") || line.contains("E(CIS/TDA)") {
299                // Only parse ground state energy for MECP compatibility
300                if line.contains("E(TD-HF/TD-DFT)") {
301                    if let Some(eq) = line.find('=') {
302                        if let Some(val) = line[eq + 1..].split_whitespace().next() {
303                            energy = val.parse().unwrap_or(0.0);
304                        }
305                    }
306                } else if let Some(caps) = CIS_TOTAL_RE.captures(line) {
307                    energy = caps[1].parse().unwrap_or(0.0);
308                }
309            }
310            // === 3. MP2 Archive Line ===
311            else if self.mp2 && trimmed.starts_with('\\') {
312                // Handle multi-line archive entries 
313                if !archive_part.is_empty() {
314                    archive_part.push('\n'); // Add newline for continuation
315                }
316                archive_part.push_str(trimmed);
317            }
318            // === 4. Forces Block ===
319            else if line.contains("Forces (Hartrees/Bohr)") {
320                in_forces = true;
321                forces.clear();
322            } else if line.contains("Cartesian Forces: Max") {
323                in_forces = false;
324            } else if in_forces {
325                if let Some(caps) = FORCE_RE.captures(line) {
326                    forces.push(caps[1].parse().unwrap_or(0.0));
327                    forces.push(caps[2].parse().unwrap_or(0.0));
328                    forces.push(caps[3].parse().unwrap_or(0.0));
329                }
330            }
331            // === 5. Geometry Block ===
332            else if line.contains("Input orientation") || line.contains("Standard orientation") {
333                in_geom = true;
334                geom_coords.clear();
335            } else if (line.contains("Distance matrix") || line.contains("Rotational constants"))
336                && in_geom
337            {
338                in_geom = false;
339            } else if in_geom {
340                if let Some(caps) = GEOM_RE.captures(line) {
341                    // CRITICAL: Do NOT rebuild elements to preserve atom order
342                    geom_coords.push(caps[2].parse().unwrap_or(0.0));
343                    geom_coords.push(caps[3].parse().unwrap_or(0.0));
344                    geom_coords.push(caps[4].parse().unwrap_or(0.0));
345                }
346            }
347        }
348        // === 6. MP2 Energy from Archive ===
349        if self.mp2 && !archive_part.is_empty() {
350            if let Some(mp2_pos) = archive_part.find("MP2=") {
351                let after = &archive_part[mp2_pos + 4..];
352                if let Some(end) = after.find('\\') {
353                    let mp2_str = after[..end].trim();
354                    if let Ok(mp2_e) = mp2_str.parse::<f64>() {
355                        energy = mp2_e;
356                    }
357                }
358            }
359        }
360        // === 7. Final Validation ===
361        let n_atoms = original_geometry.num_atoms;
362        if n_atoms == 0 {
363            return Err(QMError::Parse("No atoms found in geometry".into()));
364        }
365        if forces.len() != 3 * n_atoms {
366            return Err(QMError::Parse(format!(
367                "Expected {} force components, got {}",
368                3 * n_atoms,
369                forces.len()
370            )));
371        }
372        if geom_coords.len() != 3 * n_atoms {
373            return Err(QMError::Parse(format!(
374                "Expected {} coordinates, got {}",
375                3 * n_atoms,
376                geom_coords.len()
377            )));
378        }
379
380        let state = State {
381            energy,
382            // Convert forces from Ha/Bohr to Ha/Å for consistency with coordinates
383            forces: forces_bohr_to_angstrom(&DVector::from_vec(forces)),
384            geometry: Geometry::new(
385                original_geometry.elements.clone(), // Preserve original atom order
386                geom_coords,  // Coordinates in Angstrom (native Gaussian output)
387            ),
388        };
389
390        // Validate the state to ensure meaningful data
391        state.validate().map_err(|e| QMError::Parse(format!(
392            "Gaussian state validation failed: {}. Check that the calculation completed successfully and produced valid energy/gradient data.",
393            e
394        )))?;
395
396        Ok(state)
397    }
398}
399
400fn atomic_number_to_symbol(num: usize) -> String {
401    match num {
402        1 => "H",    // Hydrogen
403        2 => "He",   // Helium
404        3 => "Li",   // Lithium
405        4 => "Be",   // Beryllium
406        5 => "B",    // Boron
407        6 => "C",    // Carbon
408        7 => "N",    // Nitrogen
409        8 => "O",    // Oxygen
410        9 => "F",    // Fluorine
411        10 => "Ne",  // Neon
412        11 => "Na",  // Sodium
413        12 => "Mg",  // Magnesium
414        13 => "Al",  // Aluminum
415        14 => "Si",  // Silicon
416        15 => "P",   // Phosphorus
417        16 => "S",   // Sulfur
418        17 => "Cl",  // Chlorine
419        18 => "Ar",  // Argon
420        19 => "K",   // Potassium
421        20 => "Ca",  // Calcium
422        21 => "Sc",  // Scandium
423        22 => "Ti",  // Titanium
424        23 => "V",   // Vanadium
425        24 => "Cr",  // Chromium
426        25 => "Mn",  // Manganese
427        26 => "Fe",  // Iron
428        27 => "Co",  // Cobalt
429        28 => "Ni",  // Nickel
430        29 => "Cu",  // Copper
431        30 => "Zn",  // Zinc
432        31 => "Ga",  // Gallium
433        32 => "Ge",  // Germanium
434        33 => "As",  // Arsenic
435        34 => "Se",  // Selenium
436        35 => "Br",  // Bromine
437        36 => "Kr",  // Krypton
438        37 => "Rb",  // Rubidium
439        38 => "Sr",  // Strontium
440        39 => "Y",   // Yttrium
441        40 => "Zr",  // Zirconium
442        41 => "Nb",  // Niobium
443        42 => "Mo",  // Molybdenum
444        43 => "Tc",  // Technetium
445        44 => "Ru",  // Ruthenium
446        45 => "Rh",  // Rhodium
447        46 => "Pd",  // Palladium
448        47 => "Ag",  // Silver
449        48 => "Cd",  // Cadmium
450        49 => "In",  // Indium
451        50 => "Sn",  // Tin
452        51 => "Sb",  // Antimony
453        52 => "Te",  // Tellurium
454        53 => "I",   // Iodine
455        54 => "Xe",  // Xenon
456        55 => "Cs",  // Cesium
457        56 => "Ba",  // Barium
458        57 => "La",  // Lanthanum
459        58 => "Ce",  // Cerium
460        59 => "Pr",  // Praseodymium
461        60 => "Nd",  // Neodymium
462        61 => "Pm",  // Promethium
463        62 => "Sm",  // Samarium
464        63 => "Eu",  // Europium
465        64 => "Gd",  // Gadolinium
466        65 => "Tb",  // Terbium
467        66 => "Dy",  // Dysprosium
468        67 => "Ho",  // Holmium
469        68 => "Er",  // Erbium
470        69 => "Tm",  // Thulium
471        70 => "Yb",  // Ytterbium
472        71 => "Lu",  // Lutetium
473        72 => "Hf",  // Hafnium
474        73 => "Ta",  // Tantalum
475        74 => "W",   // Tungsten
476        75 => "Re",  // Rhenium
477        76 => "Os",  // Osmium
478        77 => "Ir",  // Iridium
479        78 => "Pt",  // Platinum
480        79 => "Au",  // Gold
481        80 => "Hg",  // Mercury
482        81 => "Tl",  // Thallium
483        82 => "Pb",  // Lead
484        83 => "Bi",  // Bismuth
485        84 => "Po",  // Polonium
486        85 => "At",  // Astatine
487        86 => "Rn",  // Radon
488        87 => "Fr",  // Francium
489        88 => "Ra",  // Radium
490        89 => "Ac",  // Actinium
491        90 => "Th",  // Thorium
492        91 => "Pa",  // Protactinium
493        92 => "U",   // Uranium
494        93 => "Np",  // Neptunium
495        94 => "Pu",  // Plutonium
496        95 => "Am",  // Americium
497        96 => "Cm",  // Curium
498        97 => "Bk",  // Berkelium
499        98 => "Cf",  // Californium
500        99 => "Es",  // Einsteinium
501        100 => "Fm", // Fermium
502        101 => "Md", // Mendelevium
503        102 => "No", // Nobelium
504        103 => "Lr", // Lawrencium
505        104 => "Rf", // Rutherfordium
506        105 => "Db", // Dubnium
507        106 => "Sg", // Seaborgium
508        107 => "Bh", // Bohrium
509        108 => "Hs", // Hassium
510        109 => "Mt", // Meitnerium
511        110 => "Ds", // Darmstadtium
512        111 => "Rg", // Roentgenium
513        112 => "Cn", // Copernicium
514        113 => "Nh", // Nihonium
515        114 => "Fl", // Flerovium
516        115 => "Mc", // Moscovium
517        116 => "Lv", // Livermorium
518        117 => "Ts", // Tennessine
519        118 => "Og", // Oganesson
520        _ => "X",    // Unknown or invalid
521    }
522    .to_string()
523}
524
525/// ORCA quantum chemistry program interface.
526///
527/// Provides support for ORCA calculations including:
528/// - DFT, TD-DFT, and CASSCF methods
529/// - Energy and gradient parsing from .engrad files
530/// - Checkpoint files (.gbw) for wavefunction continuity
531///
532/// # Examples
533///
534/// ```
535/// use omecp::qm_interface::OrcaInterface;
536///
537/// // Create interface with ORCA
538/// let orca = OrcaInterface::new("orca".to_string());
539/// ```
540pub struct OrcaInterface {
541    /// ORCA executable command (e.g., "orca", "/path/to/orca")
542    pub command: String,
543}
544
545impl OrcaInterface {
546    /// Creates a new ORCA interface.
547    ///
548    /// # Arguments
549    ///
550    /// * `command` - ORCA executable command (e.g., "orca", "/path/to/orca")
551    ///
552    /// # Examples
553    ///
554    /// ```
555    /// use omecp::qm_interface::OrcaInterface;
556    ///
557    /// let orca = OrcaInterface::new("orca".to_string());
558    /// ```
559    pub fn new(command: String) -> Self {
560        Self { command }
561    }
562}
563
564impl QMInterface for OrcaInterface {
565    fn write_input(&self, geom: &Geometry, header: &str, tail: &str, path: &Path) -> Result<()> {
566        let mut content = String::new();
567        content.push_str(header);
568        content.push('\n');
569
570        for i in 0..geom.num_atoms {
571            let coords = geom.get_atom_coords(i);
572            // Coordinates are already in Angstrom - write directly
573            content.push_str(&format!(
574                "{}  {:.8}  {:.8}  {:.8}\n",
575                geom.elements[i], coords[0], coords[1], coords[2]
576            ));
577        }
578
579        content.push_str("*\n");
580        content.push_str(tail);
581        content.push('\n');
582
583        fs::write(path, content)?;
584        Ok(())
585    }
586
587    fn run_calculation(&self, input_path: &Path) -> Result<()> {
588        let output_path = input_path.with_extension("out");
589        let output_file = fs::File::create(&output_path)?;
590        let status = Command::new(&self.command)
591            .arg(input_path)
592            .stdout(Stdio::from(output_file))
593            .status()?;
594
595        if !status.success() {
596            return Err(QMError::Calculation("ORCA failed".into()));
597        }
598        Ok(())
599    }
600
601    fn read_output(&self, output_path: &Path, _original_geometry: &Geometry, _state: usize) -> Result<State> {
602        let base = output_path.with_extension("");
603        let engrad_path = base.with_extension("engrad");
604        let log_path = output_path;
605
606        // Check if required files exist before attempting to read
607        if !engrad_path.exists() {
608            return Err(QMError::Parse(format!(
609                "ORCA engrad file not found: {}. Check that ORCA calculation completed successfully and produced gradient output.",
610                engrad_path.display()
611            )));
612        }
613
614        if !log_path.exists() {
615            return Err(QMError::Parse(format!(
616                "ORCA output file not found: {}. Check that ORCA calculation completed successfully.",
617                log_path.display()
618            )));
619        }
620
621        let engrad_content = fs::read_to_string(&engrad_path).map_err(|e| {
622            QMError::Parse(format!(
623                "Failed to read ORCA engrad file {}: {}",
624                engrad_path.display(),
625                e
626            ))
627        })?;
628        let log_content = fs::read_to_string(log_path).map_err(|e| {
629            QMError::Parse(format!(
630                "Failed to read ORCA output file {}: {}",
631                log_path.display(),
632                e
633            ))
634        })?;
635
636        let mut energy = 0.0;
637        let mut forces = Vec::new();
638        let mut geom_coords = Vec::new();
639        let mut elements = Vec::new();
640
641        let mut in_geom = false;
642        let mut in_forces = false;
643
644        for line in engrad_content.lines() {
645            if line.contains("The atomic numbers and current coordinates in Bohr") {
646                in_geom = true;
647            } else if line.starts_with('#') && !geom_coords.is_empty() {
648                in_geom = false;
649            } else if line.contains("The current gradient") {
650                in_forces = true;
651            } else if line.starts_with('#') && !forces.is_empty() {
652                in_forces = false;
653            } else if in_geom
654                && line
655                    .trim()
656                    .chars()
657                    .next()
658                    .is_some_and(|c| c.is_ascii_digit())
659            {
660                let parts: Vec<&str> = line.split_whitespace().collect();
661                if parts.len() >= 4 {
662                    let atomic_num: usize = parts[0].parse().unwrap_or(0);
663                    elements.push(atomic_number_to_symbol(atomic_num));
664                    geom_coords.push(parts[1].parse::<f64>().map_err(|_| {
665                        QMError::Parse(format!(
666                            "Invalid coordinate x in {}: {}",
667                            engrad_path.display(),
668                            line
669                        ))
670                    })?);
671                    geom_coords.push(parts[2].parse::<f64>().map_err(|_| {
672                        QMError::Parse(format!(
673                            "Invalid coordinate y in {}: {}",
674                            engrad_path.display(),
675                            line
676                        ))
677                    })?);
678                    geom_coords.push(parts[3].parse::<f64>().map_err(|_| {
679                        QMError::Parse(format!(
680                            "Invalid coordinate z in {}: {}",
681                            engrad_path.display(),
682                            line
683                        ))
684                    })?);
685                }
686            } else if in_forces
687                && line
688                    .trim()
689                    .chars()
690                    .next()
691                    .is_some_and(|c| c.is_ascii_digit() || c == '-')
692            {
693                let val = line.trim().parse::<f64>().map_err(|e| {
694                    QMError::Parse(format!(
695                        "Failed to parse gradient value '{}' in {}: {}",
696                        line.trim(),
697                        engrad_path.display(),
698                        e
699                    ))
700                })?;
701                forces.push(-val); // Orca engrad stores gradient not force, so we need to negate gradients to have forces 
702            }
703        }
704
705        for line in log_content.lines() {
706            if line.contains("FINAL SINGLE POINT ENERGY") {
707                let parts: Vec<&str> = line.split_whitespace().collect();
708                if parts.len() >= 5 {
709                    energy = parts[4].parse().unwrap_or(0.0);
710                }
711            }
712        }
713
714        // Validate that we successfully parsed required data
715        if forces.is_empty() {
716            return Err(QMError::Parse(format!(
717                "Failed to parse forces from ORCA engrad file: {}. Check that gradient calculation completed successfully.",
718                engrad_path.display()
719            )));
720        }
721
722        if geom_coords.is_empty() {
723            return Err(QMError::Parse(format!(
724                "Failed to parse geometry from ORCA engrad file: {}. Check that the file contains valid atomic coordinates.",
725                engrad_path.display()
726            )));
727        }
728
729        // Validate that we got meaningful energy (prevent zero energy failures)
730        if energy == 0.0 {
731            return Err(QMError::Parse(format!(
732                "Failed to extract energy from ORCA output file: {}. Check that the calculation completed successfully and contains 'FINAL SINGLE POINT ENERGY'.",
733                log_path.display()
734            )));
735        }
736
737        // Validate force/geometry consistency
738        let n_atoms = elements.len();
739        if forces.len() != 3 * n_atoms {
740            return Err(QMError::Parse(format!(
741                "Force/geometry mismatch in ORCA output: expected {} force components for {} atoms, got {}",
742                3 * n_atoms, n_atoms, forces.len()
743            )));
744        }
745
746        // Convert coordinates from Bohr (ORCA .engrad native) to Angstrom (internal storage)
747        let geom_coords_angstrom = bohr_to_angstrom(&DVector::from_vec(geom_coords));
748
749        let state = State {
750            energy,
751            // Convert forces from Ha/Bohr to Ha/Å for consistency with coordinates
752            forces: forces_bohr_to_angstrom(&DVector::from_vec(forces)),
753            geometry: Geometry::new(elements, geom_coords_angstrom.data.as_vec().clone()),  // Coordinates in Angstrom
754        };
755
756        // Validate the state to ensure meaningful data
757        state.validate().map_err(|e| QMError::Parse(format!(
758            "ORCA state validation failed: {}. Check that the calculation completed successfully and produced valid energy/gradient data.",
759            e
760        )))?;
761
762        Ok(state)
763    }
764}
765
766/// BAGEL quantum chemistry program interface.
767///
768/// Provides support for BAGEL calculations including:
769/// - CASSCF and MRCI methods
770/// - JSON-based input files
771/// - Parsing energies and forces from BAGEL output
772///
773/// # Examples
774///
775/// ```
776/// use omecp::qm_interface::BagelInterface;
777///
778/// // Create interface with BAGEL
779/// let bagel = BagelInterface::new(
780///     "bagel".to_string(),
781///     "{\"job\":{\"method\":\"caspt2\"}}".to_string(),
782/// );
783/// ```
784pub struct BagelInterface {
785    /// BAGEL executable command (e.g., "bagel", "/path/to/bagel")
786    pub command: String,
787    /// JSON template for BAGEL input (geometry placeholder will be replaced)
788    pub model_template: String,
789}
790
791/// XTB semi-empirical program interface.
792///
793/// Provides support for XTB calculations using GFN2-xTB method:
794/// - Fast semi-empirical energies and gradients
795/// - XYZ input format
796/// - Parsing from .engrad files
797///
798/// # Examples
799///
800/// ```
801/// use omecp::qm_interface::XtbInterface;
802///
803/// // Create interface with XTB
804/// let xtb = XtbInterface::new("xtb".to_string());
805/// ```
806pub struct XtbInterface {
807    /// XTB executable command (e.g., "xtb", "/path/to/xtb")
808    pub command: String,
809}
810
811impl BagelInterface {
812    /// Creates a new BAGEL interface.
813    ///
814    /// # Arguments
815    ///
816    /// * `command` - BAGEL executable command (e.g., "bagel", "/path/to/bagel")
817    /// * `model_template` - JSON string for BAGEL input, with "geometry" placeholder
818    ///
819    /// # Examples
820    ///
821    /// ```
822    /// use omecp::qm_interface::BagelInterface;
823    ///
824    /// let bagel = BagelInterface::new(
825    ///     "bagel".to_string(),
826    ///     "{\"job\":{\"method\":\"caspt2\"}}".to_string(),
827    /// );
828    /// ```
829    pub fn new(command: String, model_template: String) -> Self {
830        Self {
831            command,
832            model_template,
833        }
834    }
835}
836
837impl QMInterface for BagelInterface {
838    fn write_input(&self, geom: &Geometry, _header: &str, _tail: &str, path: &Path) -> Result<()> {
839        let mut content = String::new();
840
841        // Read model template and replace placeholders
842        for line in self.model_template.lines() {
843            if line.contains("geometry") {
844                content.push_str(&geometry_to_json(geom));
845            } else {
846                content.push_str(line);
847                content.push('\n');
848            }
849        }
850
851        fs::write(path, content)?;
852        Ok(())
853    }
854
855    fn run_calculation(&self, input_path: &Path) -> Result<()> {
856        let output = Command::new(&self.command).arg(input_path).output()?;
857
858        if !output.status.success() {
859            return Err(QMError::Calculation(
860                String::from_utf8_lossy(&output.stderr).to_string(),
861            ));
862        }
863        Ok(())
864    }
865
866    fn read_output(&self, output_path: &Path, _original_geometry: &Geometry, state: usize) -> Result<State> {
867        // Check if output file exists before attempting to read
868        if !output_path.exists() {
869            return Err(QMError::Parse(format!(
870                "BAGEL output file not found: {}. Check that BAGEL calculation completed successfully.",
871                output_path.display()
872            )));
873        }
874
875        let content = fs::read_to_string(output_path).map_err(|e| {
876            QMError::Parse(format!(
877                "Failed to read BAGEL output file {}: {}",
878                output_path.display(),
879                e
880            ))
881        })?;
882
883        let mut energy = 0.0;
884        let mut forces = Vec::new();
885        let mut geom_coords = Vec::new();
886        let mut elements = Vec::new();
887
888        let mut in_geom = false;
889        let mut in_forces = false;
890        let mut in_energy = false;
891
892        for line in content.lines() {
893            if line.contains("*** Geometry ***") {
894                in_geom = true;
895                geom_coords.clear();
896                elements.clear();
897            } else if line.contains("Number of auxiliary basis functions") {
898                in_geom = false;
899            } else if line.contains("Nuclear energy gradient") {
900                in_forces = true;
901            } else if line.contains("* Gradient computed with") {
902                in_forces = false;
903            } else if line.contains("=== FCI iteration ===") {
904                in_energy = true;
905            } else if in_energy
906                && line
907                    .trim()
908                    .chars()
909                    .next()
910                    .is_some_and(|c| c.is_ascii_digit())
911            {
912                let parts: Vec<&str> = line.split_whitespace().collect();
913                if parts.len() >= 4 && parts[1].parse::<usize>().ok() == Some(state) {
914                    energy = parts[parts.len() - 3].parse().unwrap_or(0.0);
915                }
916            } else if line.contains("MS-CASPT2 energy : state") {
917                let state_str = line.split("state").nth(1).unwrap_or("");
918                let parts: Vec<&str> = state_str.split_whitespace().collect();
919                if parts.len() >= 2 && parts[0].parse::<usize>().ok() == Some(state) {
920                    energy = parts[1].parse().unwrap_or(0.0);
921                }
922            } else if in_geom && line.contains("{ \"atom\" :") {
923                // Parse: { "atom" : "C", "xyz" : [ 1.0, 2.0, 3.0 ] }
924                if let Some(atom_part) = line.split("\"atom\"").nth(1) {
925                    if let Some(elem) = atom_part.split('"').nth(2) {
926                        elements.push(elem.to_string());
927                    }
928                }
929                if let Some(xyz_part) = line.split('[').nth(1) {
930                    if let Some(coords_str) = xyz_part.split(']').next() {
931                        for coord in coords_str.split(',') {
932                            if let Ok(val) = coord.trim().parse::<f64>() {
933                                geom_coords.push(val); // Keep in Bohrs
934                            }
935                        }
936                    }
937                }
938            } else if in_forces && line.trim().starts_with(['x', 'y', 'z']) {
939                if let Some(val) = line.split_whitespace().last() {
940                    forces.push(-val.parse::<f64>().unwrap_or(0.0)); // Gradient to force
941                }
942            }
943        }
944
945        // Validate that we successfully parsed required data
946        if forces.is_empty() {
947            return Err(QMError::Parse(format!(
948                "Failed to parse forces from BAGEL output file: {}. Check that gradient calculation completed successfully.",
949                output_path.display()
950            )));
951        }
952
953        if geom_coords.is_empty() {
954            return Err(QMError::Parse(format!(
955                "Failed to parse geometry from BAGEL output file: {}. Check that the file contains valid atomic coordinates.",
956                output_path.display()
957            )));
958        }
959
960        // Validate that we got meaningful energy (prevent zero energy failures)
961        if energy == 0.0 {
962            return Err(QMError::Parse(format!(
963                "Failed to extract energy from BAGEL output file: {}. Check that the calculation completed successfully and contains valid energy data for state {}.",
964                output_path.display(), state
965            )));
966        }
967
968        // Validate force/geometry consistency
969        let n_atoms = elements.len();
970        if forces.len() != 3 * n_atoms {
971            return Err(QMError::Parse(format!(
972                "Force/geometry mismatch in BAGEL output: expected {} force components for {} atoms, got {}",
973                3 * n_atoms, n_atoms, forces.len()
974            )));
975        }
976
977        // Convert coordinates from Bohr (BAGEL native) to Angstrom (internal storage)
978        let geom_coords_angstrom = bohr_to_angstrom(&DVector::from_vec(geom_coords));
979
980        let state = State {
981            energy,
982            // Convert forces from Ha/Bohr to Ha/Å for consistency with coordinates
983            forces: forces_bohr_to_angstrom(&DVector::from_vec(forces)),
984            geometry: Geometry::new(elements, geom_coords_angstrom.data.as_vec().clone()),  // Coordinates in Angstrom
985        };
986
987        // Validate the state to ensure meaningful data
988        state.validate().map_err(|e| QMError::Parse(format!(
989            "BAGEL state validation failed: {}. Check that the calculation completed successfully and produced valid energy/gradient data.",
990            e
991        )))?;
992
993        Ok(state)
994    }
995}
996
997impl XtbInterface {
998    /// Creates a new XTB interface.
999    ///
1000    /// # Arguments
1001    ///
1002    /// * `command` - XTB executable command (e.g., "xtb", "/path/to/xtb")
1003    ///
1004    /// # Examples
1005    ///
1006    /// ```
1007    /// use omecp::qm_interface::XtbInterface;
1008    ///
1009    /// let xtb = XtbInterface::new("xtb".to_string());
1010    /// ```
1011    pub fn new(command: String) -> Self {
1012        Self { command }
1013    }
1014}
1015
1016impl QMInterface for XtbInterface {
1017    fn write_input(&self, geom: &Geometry, _header: &str, _tail: &str, path: &Path) -> Result<()> {
1018        // xTB uses XYZ format
1019        io::write_xyz(geom, path)?;
1020        Ok(())
1021    }
1022
1023    fn run_calculation(&self, input_path: &Path) -> Result<()> {
1024        let output = Command::new(&self.command)
1025            .arg(input_path)
1026            .arg("--engrad")
1027            .output()?;
1028
1029        if !output.status.success() {
1030            return Err(QMError::Calculation(
1031                String::from_utf8_lossy(&output.stderr).to_string(),
1032            ));
1033        }
1034        Ok(())
1035    }
1036
1037    fn read_output(&self, output_path: &Path, _original_geometry: &Geometry, _state: usize) -> Result<State> {
1038        // xTB outputs energy and gradients to .engrad file
1039        let engrad_path = output_path.with_extension("engrad");
1040
1041        // Check if required files exist before attempting to read
1042        if !engrad_path.exists() {
1043            return Err(QMError::Parse(format!(
1044                "XTB engrad file not found: {}. Check that XTB calculation completed successfully and produced gradient output.",
1045                engrad_path.display()
1046            )));
1047        }
1048
1049        let content = fs::read_to_string(&engrad_path).map_err(|e| {
1050            QMError::Parse(format!(
1051                "Failed to read XTB engrad file {}: {}",
1052                engrad_path.display(),
1053                e
1054            ))
1055        })?;
1056
1057        let mut energy = 0.0;
1058        let mut forces = Vec::new();
1059        let mut geom_coords = Vec::new();
1060        let mut elements = Vec::new();
1061
1062        let mut in_energy = false;
1063        let mut in_geom = false;
1064        let mut in_forces = false;
1065
1066        for line in content.lines() {
1067            let line = line.trim();
1068            if line.starts_with("$energy") {
1069                in_energy = true;
1070                continue;
1071            } else if line.starts_with("$gradients") {
1072                in_forces = true;
1073                in_energy = false;
1074                continue;
1075            } else if line.starts_with("$geometry") {
1076                in_geom = true;
1077                in_forces = false;
1078                continue;
1079            } else if line.starts_with("$") {
1080                in_energy = false;
1081                in_geom = false;
1082                in_forces = false;
1083                continue;
1084            }
1085
1086            if in_energy && !line.is_empty() {
1087                energy = line.parse().unwrap_or(0.0);
1088            } else if in_geom && !line.is_empty() {
1089                let parts: Vec<&str> = line.split_whitespace().collect();
1090                if parts.len() >= 4 {
1091                    elements.push(parts[0].to_string());
1092                    geom_coords.push(parts[1].parse().unwrap_or(0.0));
1093                    geom_coords.push(parts[2].parse().unwrap_or(0.0));
1094                    geom_coords.push(parts[3].parse().unwrap_or(0.0));
1095                }
1096            } else if in_forces && !line.is_empty() {
1097                let parts: Vec<&str> = line.split_whitespace().collect();
1098                if parts.len() >= 3 {
1099                    forces.push(parts[0].parse().unwrap_or(0.0));
1100                    forces.push(parts[1].parse().unwrap_or(0.0));
1101                    forces.push(parts[2].parse().unwrap_or(0.0));
1102                }
1103            }
1104        }
1105
1106        // Validate that we successfully parsed required data
1107        if forces.is_empty() {
1108            return Err(QMError::Parse(format!(
1109                "Failed to parse forces from XTB engrad file: {}. Check that gradient calculation completed successfully.",
1110                engrad_path.display()
1111            )));
1112        }
1113
1114        if geom_coords.is_empty() {
1115            return Err(QMError::Parse(format!(
1116                "Failed to parse geometry from XTB engrad file: {}. Check that the file contains valid atomic coordinates.",
1117                engrad_path.display()
1118            )));
1119        }
1120
1121        // Validate that we got meaningful energy (prevent zero energy failures)
1122        if energy == 0.0 {
1123            return Err(QMError::Parse(format!(
1124                "Failed to extract energy from XTB engrad file: {}. Check that the calculation completed successfully and contains valid energy data.",
1125                engrad_path.display()
1126            )));
1127        }
1128
1129        // Validate force/geometry consistency
1130        let n_atoms = elements.len();
1131        if forces.len() != 3 * n_atoms {
1132            return Err(QMError::Parse(format!(
1133                "Force/geometry mismatch in XTB output: expected {} force components for {} atoms, got {}",
1134                3 * n_atoms, n_atoms, forces.len()
1135            )));
1136        }
1137
1138        // Convert coordinates from Bohr (XTB .engrad native) to Angstrom (internal storage)
1139        let geom_coords_angstrom = bohr_to_angstrom(&DVector::from_vec(geom_coords));
1140
1141        let state = State {
1142            energy,
1143            // Convert forces from Ha/Bohr to Ha/Å for consistency with coordinates
1144            forces: forces_bohr_to_angstrom(&DVector::from_vec(forces)),
1145            geometry: Geometry::new(elements, geom_coords_angstrom.data.as_vec().clone()),  // Coordinates in Angstrom
1146        };
1147
1148        // Validate the state to ensure meaningful data
1149        state.validate().map_err(|e| QMError::Parse(format!(
1150            "XTB state validation failed: {}. Check that the calculation completed successfully and produced valid energy/gradient data.",
1151            e
1152        )))?;
1153
1154        Ok(state)
1155    }
1156}
1157
1158fn geometry_to_json(geom: &Geometry) -> String {
1159    let mut result = String::from("\"geometry\" : [\n");
1160
1161    for i in 0..geom.num_atoms {
1162        let coords = geom.get_atom_coords(i);
1163        // Coordinates are already in Angstrom - write directly
1164        // Note: BAGEL expects Bohr coordinates in JSON, so we convert Angstrom to Bohr
1165        let bohr_coords =
1166            crate::geometry::angstrom_to_bohr(&nalgebra::DVector::from_vec(vec![
1167                coords[0], coords[1], coords[2],
1168            ]));
1169        result.push_str(&format!(
1170            "{{ \"atom\" : \"{}\", \"xyz\" : [ {:.8}, {:.8}, {:.8} ] }}",
1171            geom.elements[i], bohr_coords[0], bohr_coords[1], bohr_coords[2]
1172        ));
1173
1174        if i < geom.num_atoms - 1 {
1175            result.push(',');
1176        }
1177        result.push('\n');
1178    }
1179
1180    result.push_str("]\n");
1181    result
1182}
1183
1184/// Configuration for custom QM interfaces
1185#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1186pub struct CustomInterfaceConfig {
1187    /// Name of the QM program
1188    pub name: String,
1189    /// Command to run the program
1190    pub command: String,
1191    /// Input file template (supports placeholders: {geometry}, {header}, {tail})
1192    pub input_template: String,
1193    /// Output file extension (e.g., "log", "out")
1194    pub output_extension: String,
1195    /// Energy parsing configuration
1196    pub energy_parser: EnergyParser,
1197    /// Forces parsing configuration (optional)
1198    pub forces_parser: Option<ForcesParser>,
1199}
1200
1201/// Configuration for parsing energy from custom QM program output.
1202///
1203/// Specifies the regular expression pattern to locate the energy value
1204/// in the output file and a unit conversion factor to convert it to Hartree.
1205#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1206pub struct EnergyParser {
1207    /// Regex pattern to find energy (should contain a capture group for the value)
1208    pub pattern: String,
1209    /// Unit conversion factor (multiply by this to get hartree)
1210    pub unit_factor: f64,
1211}
1212
1213/// Configuration for parsing forces from custom QM program output.
1214///
1215/// Specifies the regular expression pattern to locate the force components
1216/// (Fx, Fy, Fz) for each atom in the output file.
1217#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1218pub struct ForcesParser {
1219    /// Regex pattern to find forces (should capture Fx, Fy, Fz for each atom)
1220    pub pattern: String,
1221}
1222
1223/// Custom QM interface that reads configuration from JSON
1224pub struct CustomInterface {
1225    config: CustomInterfaceConfig,
1226    energy_regex: Regex,
1227    forces_regex: Option<Regex>,
1228}
1229
1230impl CustomInterface {
1231    /// Creates a new `CustomInterface` by loading configuration from a JSON file.
1232    ///
1233    /// This function reads a JSON configuration file that defines how to interact
1234    /// with a custom quantum chemistry program. The configuration includes:
1235    /// - Program command
1236    /// - Input file template with placeholders
1237    /// - Output file extension
1238    /// - Regular expressions for parsing energy and forces
1239    ///
1240    /// # Arguments
1241    ///
1242    /// * `config_path` - Path to the JSON configuration file
1243    ///
1244    /// # Returns
1245    ///
1246    /// Returns a `Result` containing:
1247    /// - `Ok(Self)` - Successfully created `CustomInterface`
1248    /// - `Err(QMError::Parse)` - Failed to read or parse the configuration file
1249    ///
1250    /// # Examples
1251    ///
1252    /// ```no_run
1253    /// use omecp::qm_interface::CustomInterface;
1254    /// use std::path::Path;
1255    ///
1256    /// let custom_interface = CustomInterface::from_file(Path::new("my_qm_config.json"))?;
1257    /// ```
1258    pub fn from_file(config_path: &Path) -> Result<Self> {
1259        let content = fs::read_to_string(config_path).map_err(|e| {
1260            QMError::Parse(format!("Failed to read custom interface config: {}", e))
1261        })?;
1262
1263        let config: CustomInterfaceConfig = serde_json::from_str(&content).map_err(|e| {
1264            QMError::Parse(format!("Failed to parse custom interface config: {}", e))
1265        })?;
1266
1267        // Compile regexes
1268        let energy_regex = Regex::new(&config.energy_parser.pattern)
1269            .map_err(|e| QMError::Parse(format!("Invalid energy regex: {}", e)))?;
1270
1271        let forces_regex = if let Some(ref forces_parser) = config.forces_parser {
1272            Some(
1273                Regex::new(&forces_parser.pattern)
1274                    .map_err(|e| QMError::Parse(format!("Invalid forces regex: {}", e)))?,
1275            )
1276        } else {
1277            None
1278        };
1279
1280        Ok(Self {
1281            config,
1282            energy_regex,
1283            forces_regex,
1284        })
1285    }
1286}
1287
1288impl QMInterface for CustomInterface {
1289    fn write_input(&self, geom: &Geometry, header: &str, tail: &str, path: &Path) -> Result<()> {
1290        // Generate geometry string in XYZ format
1291        let mut geometry_lines = Vec::new();
1292        for i in 0..geom.num_atoms {
1293            let coords = geom.get_atom_coords(i);
1294            // Coordinates are already in Angstrom - write directly
1295            geometry_lines.push(format!(
1296                "{}  {:.8}  {:.8}  {:.8}",
1297                geom.elements[i], coords[0], coords[1], coords[2]
1298            ));
1299        }
1300        let geometry_str = geometry_lines.join("\n");
1301
1302        // Replace placeholders in template
1303        let input_content = self
1304            .config
1305            .input_template
1306            .replace("{geometry}", &geometry_str)
1307            .replace("{header}", header)
1308            .replace("{tail}", tail);
1309
1310        fs::write(path, input_content)?;
1311        Ok(())
1312    }
1313
1314    fn run_calculation(&self, input_path: &Path) -> Result<()> {
1315        let output = Command::new(&self.config.command)
1316            .arg(input_path)
1317            .output()?;
1318
1319        if !output.status.success() {
1320            return Err(QMError::Calculation(
1321                String::from_utf8_lossy(&output.stderr).to_string(),
1322            ));
1323        }
1324        Ok(())
1325    }
1326
1327    fn read_output(&self, output_path: &Path, _original_geometry: &Geometry, _state: usize) -> Result<State> {
1328        // Check if output file exists before attempting to read
1329        if !output_path.exists() {
1330            return Err(QMError::Parse(format!(
1331                "Custom QM program ({}) output file not found: {}. Check that {} calculation completed successfully.",
1332                self.config.name, output_path.display(), self.config.name
1333            )));
1334        }
1335
1336        let content = fs::read_to_string(output_path).map_err(|e| {
1337            QMError::Parse(format!(
1338                "Failed to read {} output file {}: {}",
1339                self.config.name,
1340                output_path.display(),
1341                e
1342            ))
1343        })?;
1344
1345        // Parse energy
1346        let energy = if let Some(caps) = self.energy_regex.captures(&content) {
1347            if let Some(energy_match) = caps.get(1) {
1348                let energy_val: f64 = energy_match.as_str().parse().map_err(|_| {
1349                    QMError::Parse(format!(
1350                        "Failed to parse energy value '{}' from {} output file: {}",
1351                        energy_match.as_str(),
1352                        self.config.name,
1353                        output_path.display()
1354                    ))
1355                })?;
1356                energy_val * self.config.energy_parser.unit_factor
1357            } else {
1358                return Err(QMError::Parse(format!(
1359                    "Energy regex pattern '{}' must have a capture group for {} output file: {}",
1360                    self.config.energy_parser.pattern,
1361                    self.config.name,
1362                    output_path.display()
1363                )));
1364            }
1365        } else {
1366            return Err(QMError::Parse(format!(
1367                "Energy pattern '{}' not found in {} output file: {}. Check that the calculation completed successfully and the pattern matches the output format.",
1368                self.config.energy_parser.pattern, self.config.name, output_path.display()
1369            )));
1370        };
1371
1372        // Parse forces if available
1373        let forces = if let Some(ref forces_regex) = self.forces_regex {
1374            let mut forces_vec = Vec::new();
1375            for caps in forces_regex.captures_iter(&content) {
1376                if caps.len() >= 4 {
1377                    let fx: f64 = caps[1].parse().map_err(|_| {
1378                        QMError::Parse(format!(
1379                            "Failed to parse force component Fx '{}' from {} output file: {}",
1380                            &caps[1],
1381                            self.config.name,
1382                            output_path.display()
1383                        ))
1384                    })?;
1385                    let fy: f64 = caps[2].parse().map_err(|_| {
1386                        QMError::Parse(format!(
1387                            "Failed to parse force component Fy '{}' from {} output file: {}",
1388                            &caps[2],
1389                            self.config.name,
1390                            output_path.display()
1391                        ))
1392                    })?;
1393                    let fz: f64 = caps[3].parse().map_err(|_| {
1394                        QMError::Parse(format!(
1395                            "Failed to parse force component Fz '{}' from {} output file: {}",
1396                            &caps[3],
1397                            self.config.name,
1398                            output_path.display()
1399                        ))
1400                    })?;
1401                    forces_vec.push(fx);
1402                    forces_vec.push(fy);
1403                    forces_vec.push(fz);
1404                }
1405            }
1406            if forces_vec.is_empty() {
1407                return Err(QMError::Parse(format!(
1408                    "No forces found using pattern '{}' in {} output file: {}. Check that gradient calculation completed successfully and the pattern matches the output format.",
1409                    self.forces_regex.as_ref().unwrap().as_str(), self.config.name, output_path.display()
1410                )));
1411            }
1412            DVector::from_vec(forces_vec)
1413        } else {
1414            // No forces available - return zero forces
1415            DVector::zeros(3)
1416        };
1417
1418        // Validate that we got meaningful energy (prevent zero energy failures)
1419        if energy == 0.0 {
1420            return Err(QMError::Parse(format!(
1421                "Extracted zero energy from {} output file: {}. Check that the calculation completed successfully and the energy pattern is correct.",
1422                self.config.name, output_path.display()
1423            )));
1424        }
1425
1426        // Return a simple geometry (could be enhanced)
1427        let geometry = Geometry::new(vec!["H".to_string()], vec![0.0, 0.0, 0.0]);
1428
1429        let state = State {
1430            energy,
1431            // Convert forces from Ha/Bohr to Ha/Å for consistency with coordinates
1432            forces: forces_bohr_to_angstrom(&forces),
1433            geometry,
1434        };
1435
1436        // Validate the state to ensure meaningful data
1437        state.validate().map_err(|e| QMError::Parse(format!(
1438            "Custom interface ({}) state validation failed: {}. Check that the calculation completed successfully and produced valid energy/gradient data.",
1439            self.config.name, e
1440        )))?;
1441
1442        Ok(state)
1443    }
1444}
1445/// Returns the output file extension for the given QM program.
1446///
1447/// This function maps each QM program to its expected output file extension:
1448/// - Gaussian: `.log`
1449/// - ORCA: `.out`
1450/// - XTB: `.out`
1451/// - BAGEL: `.json`
1452/// - Custom: `.log`
1453///
1454/// # Arguments
1455///
1456/// * `program` - The QM program type
1457///
1458/// # Returns
1459///
1460/// The file extension (without the dot) as a string slice
1461///
1462/// # Examples
1463///
1464/// ```
1465/// use omecp::config::QMProgram;
1466/// use omecp::qm_interface::get_output_file_base;
1467///
1468/// assert_eq!(get_output_file_base(QMProgram::Gaussian), "log");
1469/// assert_eq!(get_output_file_base(QMProgram::Orca), "out");
1470/// ```
1471pub fn get_output_file_base(program: crate::config::QMProgram) -> &'static str {
1472    match program {
1473        crate::config::QMProgram::Gaussian => "log",
1474        crate::config::QMProgram::Orca => "out",
1475        crate::config::QMProgram::Xtb => "out",
1476        crate::config::QMProgram::Bagel => "json",
1477        crate::config::QMProgram::Custom => "log",
1478    }
1479}