Skip to main content
← OpenMECP Documentation

omecp/
template_generator.rs

1use std::fs;
2use std::io::{self, Write};
3use std::path::{Path, PathBuf};
4
5/// Template generator for creating MECP input files from geometry files
6///
7/// Generate a template input file from a geometry file
8/// Supports .xyz, .log, and .gjf formats
9pub fn generate_template_from_file<P: AsRef<Path>>(
10    geometry_file: P,
11) -> Result<String, Box<dyn std::error::Error>> {
12    let geometry_file = geometry_file.as_ref();
13
14    if !geometry_file.exists() {
15        return Err(format!("File not found: {}", geometry_file.display()).into());
16    }
17
18    let extension = geometry_file
19        .extension()
20        .and_then(|s| s.to_str())
21        .unwrap_or("");
22
23    let (elements, coords) = match extension.to_lowercase().as_str() {
24        "xyz" => extract_geometry_from_xyz(geometry_file)?,
25        "log" => extract_geometry_from_log(geometry_file)?,
26        "gjf" => extract_geometry_from_gjf(geometry_file)?,
27        _ => return Err(format!("Unsupported file format: {}", extension).into()),
28    };
29
30    let geom_path = geometry_file.canonicalize()?;
31
32    Ok(generate_template(elements, &coords, &geom_path))
33}
34
35/// Extract geometry from XYZ file
36fn extract_geometry_from_xyz(
37    path: &Path,
38) -> Result<(Vec<String>, Vec<f64>), Box<dyn std::error::Error>> {
39    let content = fs::read_to_string(path)?;
40    let lines: Vec<&str> = content.lines().collect();
41
42    if lines.len() < 3 {
43        return Err("Invalid XYZ file: not enough lines".into());
44    }
45
46    let num_atoms = lines[0]
47        .trim()
48        .parse::<usize>()
49        .map_err(|_| "Invalid XYZ file: cannot parse number of atoms")?;
50
51    let mut elements = Vec::new();
52    let mut coords = Vec::new();
53
54    for i in 2..2 + num_atoms {
55        if i >= lines.len() {
56            return Err("Invalid XYZ file: incomplete geometry".into());
57        }
58
59        let parts: Vec<&str> = lines[i].split_whitespace().collect();
60        if parts.len() < 4 {
61            return Err("Invalid XYZ file: malformed coordinate line".into());
62        }
63
64        elements.push(parts[0].to_string());
65        coords.push(parts[1].parse::<f64>()?);
66        coords.push(parts[2].parse::<f64>()?);
67        coords.push(parts[3].parse::<f64>()?);
68    }
69
70    Ok((elements, coords))
71}
72
73/// Extract geometry from Gaussian .log file
74fn extract_geometry_from_log(
75    path: &Path,
76) -> Result<(Vec<String>, Vec<f64>), Box<dyn std::error::Error>> {
77    let content = fs::read_to_string(path)?;
78
79    // Find "Input orientation" section
80    let lines: Vec<&str> = content.lines().collect();
81    let mut in_input_section = false;
82    let mut elements = Vec::new();
83    let mut coords = Vec::new();
84
85    for line in lines {
86        if line.contains("Input orientation") {
87            in_input_section = true;
88            continue;
89        }
90
91        if in_input_section {
92            if line.contains("Distance matrix") || line.contains("Rotational constants") {
93                break;
94            }
95
96            // Parse coordinate lines: atomic_number element x y z
97            let parts: Vec<&str> = line.split_whitespace().collect();
98            if parts.len() >= 5 && parts[0].parse::<usize>().is_ok() {
99                let element = parts[1];
100                let x = parts[3].parse::<f64>()?;
101                let y = parts[4].parse::<f64>()?;
102                let z = parts[5].parse::<f64>()?;
103
104                elements.push(element.to_string());
105                coords.extend_from_slice(&[x, y, z]);
106            }
107        }
108    }
109
110    if elements.is_empty() {
111        return Err("Could not find geometry in log file".into());
112    }
113
114    Ok((elements, coords))
115}
116
117/// Extract geometry from Gaussian .gjf file
118fn extract_geometry_from_gjf(
119    path: &Path,
120) -> Result<(Vec<String>, Vec<f64>), Box<dyn std::error::Error>> {
121    let content = fs::read_to_string(path)?;
122    let lines: Vec<&str> = content.lines().collect();
123
124    let mut elements = Vec::new();
125    let mut coords = Vec::new();
126
127    // Parse geometry section (after header, before empty line and tail)
128    // State machine: 0=header, 1=title, 2=charge_mult, 3=geometry
129    let mut state = 0;
130
131    for line in lines {
132        let trimmed = line.trim();
133
134        match state {
135            // Skip header lines (starting with % or #)
136            0 => {
137                if trimmed.is_empty() {
138                    state = 1;
139                } else if trimmed.starts_with('%') || trimmed.starts_with('#') {
140                    continue;
141                }
142            }
143            // Skip title line
144            1 => {
145                if trimmed.is_empty() {
146                    state = 2;
147                }
148            }
149            // Skip charge/mult line
150            2 => {
151                if !trimmed.is_empty() {
152                    // This is the charge/mult line, advance to geometry state
153                    state = 3;
154                }
155            }
156            // Parse geometry
157            3 => {
158                // Empty line marks end of geometry section
159                if trimmed.is_empty() {
160                    break;
161                }
162
163                // Parse element and coordinates
164                let parts: Vec<&str> = trimmed.split_whitespace().collect();
165                if parts.len() >= 4 {
166                    elements.push(parts[0].to_string());
167                    coords.push(parts[1].parse::<f64>()?);
168                    coords.push(parts[2].parse::<f64>()?);
169                    coords.push(parts[3].parse::<f64>()?);
170                }
171            }
172            _ => {}
173        }
174    }
175
176    if elements.is_empty() {
177        return Err("Could not find geometry in gjf file".into());
178    }
179
180    Ok((elements, coords))
181}
182
183/// Generate the template input file content
184fn generate_template(_elements: Vec<String>, _coords: &Vec<f64>, geometry_path: &Path) -> String {
185    let geom_filename = geometry_path
186        .file_name()
187        .and_then(|s| s.to_str())
188        .unwrap_or("geometry.xyz");
189
190    format!(
191        r#"#===============================================================================
192# OpenMECP Input Template
193#===============================================================================
194# Generated from geometry: {geom_filename}
195#===============================================================================
196
197#===== Basic Settings (required) ===============================================
198nprocs = 30                         # processors for QM program
199mem = 120GB                         # memory (use maxcore value for ORCA)
200method = n b3lyp/6-31g**            # QM method: n method/basis keywords ...
201td_state_a =                        # TD-DFT keywords for state A (Gaussian). Short: td_a
202td_state_b =                        # TD-DFT keywords for state B (Gaussian). Short: td_b
203mp2 = false                         # true for MP2 or double-hybrid in Gaussian
204charge = 1                          # molecular charge (same for both states unless charge_b)
205mult_state_a = 3                    # multiplicity of state A. Short: mult_a
206mult_state_b = 1                    # multiplicity of state B. Short: mult_b
207mode = normal                       # normal | read | noread | stable | inter_read
208
209#===== Convergence Thresholds ==================================================
210delta_e = 0.000050                  # energy difference convergence (hartree)
211rms_dis = 0.0025                    # RMS displacement convergence (angstrom)
212max_dis = 0.004                     # max displacement convergence (angstrom)
213max_grad = 0.001323                 # max gradient convergence (hartree/angstrom)
214rms_grad = 0.000945                 # RMS gradient convergence (hartree/angstrom)
215
216#===== Optimization Control ====================================================
217max_steps = 100                     # maximum optimization steps
218max_step_size = 0.1                 # maximum step size (angstrom)
219max_history = 4                     # DIIS history length
220reduced_factor = 0.5                # step reduction factor near convergence
221step_reduction_multiplier = 10.0    # rms_g multiplier for step reduction threshold
222steepest_descent_step = 0.01        # fallback step when DIIS/BFGS fails (angstrom)
223bfgs_rho = 15                       # BFGS step amplification factor (rho)
224
225#===== Optimizer Selection =====================================================
226switch_step = 3                     # 0=DIIS-only, 3=BFGS→DIIS (default), >=max_steps=BFGS-only
227hessian = direct_psb                # direct_psb (default) | inverse_bfgs | bofill | powell | bfgs_powell_mix
228                                    # Note: blend mode requires a direct Hessian method.
229use_gediis = false                  # options: false/none=GDIIS (default), true/sequential=GEDIIS,
230                                    #           blend=GDIIS_blend with trust region
231use_hybrid_gediis = false           # activated when use_gediis = true or blend
232                                    # options: true/false
233gediis_blend_mode = fixed_sequential # activated when use_gediis = blend AND use_hybrid_gediis = true
234                                     # options: fixed, fixed_sequential (default), gradient, sequential
235smart_history = false               # experimental: remove worst DIIS point instead of oldest
236
237#===== Optimization Control for blend-mode trust radius only====================
238trust_reduction_factor = 0.5        # trust radius contraction on energy increase
239trust_increase_factor = 1.2         # trust radius expansion on energy decrease
240trust_inc_threshold = 0.0001        # energy increase threshold for reduction (hartree)
241trust_dec_threshold = 0.0001        # energy decrease threshold for expansion (hartree)
242trust_min_radius = 0.01             # minimum trust radius (angstrom)
243trust_max_radius = 1.0              # maximum trust radius (angstrom)
244
245#===== Advanced Optimizer Settings =============================================
246# ALL parameters below are commented out (= inactive). Their default values
247# (shown after the = sign) are used unless you uncomment and change them.
248#
249#gediis_switch_rms = 0.005          # Phase 1→2: RMS gradient for GDIIS→GEDIIS switch
250                                    # (active: use_gediis=true + use_hybrid_gediis=true)
251#gediis_switch_step = 0.001         # Phase 2→3: RMS displacement for GEDIIS→GDIIS switch
252                                    # (also used by blend's fixed_sequential mode)
253#use_robust_diis = false            # Activate DIIS step validation (cosine & coefficient checks)
254#gediis_variant = auto              # auto (default) | rfo | energy | simultaneous
255                                    # (active: use_gediis=true + use_robust_diis=true)
256#gdiis_cosine_check = standard      # none | zero | standard (default) | variable | strict
257                                    # (active: use_robust_diis=true)
258#gdiis_coeff_check = regular        # none | regular (default) | strict
259                                    # (active: use_robust_diis=true)
260#n_neg = 0                          # 0 = minimum search, 1 = transition state
261#gediis_sim_switch = 0.0025         # (active: use_robust_diis=true)
262
263#===== Program Settings ========================================================
264program = gaussian                  # gaussian | orca | xtb | bagel
265gau_comm = g16                      # Gaussian command
266orca_comm = orca                    # ORCA command
267xtb_comm = xtb                      # XTB command
268bagel_comm = bagel                  # BAGEL command
269bagel_model = model.inp             # BAGEL model file
270#custom_interface_file = custom_qm.json  # custom QM program config
271
272#===== Advanced Options ========================================================
273#restart = false                    # restart from checkpoint file
274#print_checkpoint = true            # save checkpoint JSON at each step
275#fix_de = 0.0                       # eV: fix energy difference (FixDE mode)
276#state_a = 0                        # state index for BAGEL multireference
277#state_b = 1
278#basis =                            # basis set (for programs separating method/basis)
279#solvent =                          # solvent model
280#dispersion =                       # dispersion correction
281#charge_b =                         # separate charge for state B (default: same as charge)
282#fixedatoms = 1,3,5-7               # comma/hyphen ranges of fixed atoms (1-based)
283
284#===== ONIOM (QM/MM) ===========================================================
285#isoniom = false
286#chargeandmultforoniom1 = 0 1
287#chargeandmultforoniom2 = 0 1
288
289#===== Coordinate Driving ======================================================
290#drive_type = bond                  # bond | angle | dihedral
291#drive_atoms = 1,2                  # atom indices (1-based)
292#drive_start = 1.0
293#drive_end = 2.0
294#drive_steps = 10
295
296#===============================================================================
297# Geometry section: Cartesian coordinates in Angstrom
298#===============================================================================
299*geom
300@{geom_filename}
301*
302
303#===============================================================================
304# Tail sections: extra keywords appended to QM input files
305#===============================================================================
306*tail_a
307# Extra keywords for state A (e.g., SCF convergence, basis sets)
308# For Gaussian: TD(NStates=5,Root=1)
309# For ORCA: %tddft nroots 5 end
310*
311
312*tail_b
313# Extra keywords for state B (e.g., SCF convergence, basis sets)
314*
315
316#===============================================================================
317# Constraints section
318# Syntax: R atom1 atom2 target      (bond constraint, Angstrom)
319#         A a1 a2 a3 target         (angle constraint, degrees)
320#         S R a1 a2 start num step  (1D scan)
321#         S A a1 a2 a3 start num step (angle scan)
322#         S R a1 a2 start1 n1 s1  R a3 a4 start2 n2 s2 (2D scan)
323# Atom indices are 1-based.
324#===============================================================================
325*constr
326#R 1 2 1.0                          # fix bond 1-2 at 1.0 Angstrom
327#A 1 2 3 100.0                      # fix angle 1-2-3 at 100 degrees
328#S R 1 2 1.0 10 0.1                 # scan bond 1-2 from 1.0, 10 steps of 0.1
329*
330
331#===== Output Control ==========================================================
332print_level = 1                     # 0=quiet, 1=normal, 2=verbose (DIIS debug)
333
334"#,
335        geom_filename = geom_filename
336    )
337}
338
339/// Write template to file
340pub fn write_template_to_file<P: AsRef<Path>>(
341    template: &str,
342    output_path: P,
343) -> Result<(), Box<dyn std::error::Error>> {
344    let output_path = output_path.as_ref();
345
346    // Create parent directory if it doesn't exist
347    if let Some(parent) = output_path.parent() {
348        fs::create_dir_all(parent)?;
349    }
350
351    fs::write(output_path, template)?;
352    Ok(())
353}
354
355/// Get default output filename based on input geometry file
356///
357/// This function generates a default `.inp` filename from the source geometry file.
358/// To prevent overwriting QM output files when creating templates from them,
359/// it adds a `_omecp` suffix when the source file has an extension that matches
360/// common QM output formats (`.log`, `.out`, `.json`).
361///
362/// # Examples
363///
364/// - `abc.xyz` → `abc.inp` (no conflict)
365/// - `abc.log` → `abc_omecp.inp` (prevents overwriting `abc.log` when run)
366/// - `calc.out` → `calc_omecp.inp` (prevents overwriting `calc.out` when run)
367/// - `result.json` → `result_omecp.inp` (prevents overwriting `result.json` when run)
368pub fn get_default_output_path<P: AsRef<Path>>(geometry_file: P) -> PathBuf {
369    let geometry_file = geometry_file.as_ref();
370    let stem = geometry_file
371        .file_stem()
372        .and_then(|s| s.to_str())
373        .unwrap_or("template");
374
375    // Check if the source file has an extension that matches QM output formats
376    let source_ext = geometry_file
377        .extension()
378        .and_then(|s| s.to_str())
379        .map(|s| s.to_lowercase());
380
381    let is_qm_output = match source_ext.as_deref() {
382        Some("log") | Some("out") | Some("json") => true,
383        _ => false,
384    };
385
386    // If source is a QM output file, add suffix to prevent overwriting
387    let output_stem = if is_qm_output {
388        format!("{}_omecp", stem)
389    } else {
390        stem.to_string()
391    };
392
393    PathBuf::from(format!("{}.inp", output_stem))
394}
395
396/// Interactive prompt for user input
397pub fn prompt_user(prompt: &str) -> Result<String, Box<dyn std::error::Error>> {
398    print!("{} ", prompt);
399    io::stdout().flush()?;
400
401    let mut input = String::new();
402    io::stdin().read_line(&mut input)?;
403    Ok(input.trim().to_string())
404}
405
406/// Validate file extension
407pub fn is_supported_format(path: &Path) -> bool {
408    match path.extension().and_then(|s| s.to_str()) {
409        Some(ext) => matches!(ext.to_lowercase().as_str(), "xyz" | "log" | "gjf"),
410        None => false,
411    }
412}