omecp/
template_generator.rs1use std::fs;
2use std::io::{self, Write};
3use std::path::{Path, PathBuf};
4
5pub fn generate_template_from_file<P: AsRef<Path>>(
10 geometry_file: P,
11) -> Result<String, Box<dyn std::error::Error>> {
12 let geometry_file = geometry_file.as_ref();
13
14 if !geometry_file.exists() {
15 return Err(format!("File not found: {}", geometry_file.display()).into());
16 }
17
18 let extension = geometry_file
19 .extension()
20 .and_then(|s| s.to_str())
21 .unwrap_or("");
22
23 let (elements, coords) = match extension.to_lowercase().as_str() {
24 "xyz" => extract_geometry_from_xyz(geometry_file)?,
25 "log" => extract_geometry_from_log(geometry_file)?,
26 "gjf" => extract_geometry_from_gjf(geometry_file)?,
27 _ => return Err(format!("Unsupported file format: {}", extension).into()),
28 };
29
30 let geom_path = geometry_file.canonicalize()?;
31
32 Ok(generate_template(elements, &coords, &geom_path))
33}
34
35fn extract_geometry_from_xyz(
37 path: &Path,
38) -> Result<(Vec<String>, Vec<f64>), Box<dyn std::error::Error>> {
39 let content = fs::read_to_string(path)?;
40 let lines: Vec<&str> = content.lines().collect();
41
42 if lines.len() < 3 {
43 return Err("Invalid XYZ file: not enough lines".into());
44 }
45
46 let num_atoms = lines[0]
47 .trim()
48 .parse::<usize>()
49 .map_err(|_| "Invalid XYZ file: cannot parse number of atoms")?;
50
51 let mut elements = Vec::new();
52 let mut coords = Vec::new();
53
54 for i in 2..2 + num_atoms {
55 if i >= lines.len() {
56 return Err("Invalid XYZ file: incomplete geometry".into());
57 }
58
59 let parts: Vec<&str> = lines[i].split_whitespace().collect();
60 if parts.len() < 4 {
61 return Err("Invalid XYZ file: malformed coordinate line".into());
62 }
63
64 elements.push(parts[0].to_string());
65 coords.push(parts[1].parse::<f64>()?);
66 coords.push(parts[2].parse::<f64>()?);
67 coords.push(parts[3].parse::<f64>()?);
68 }
69
70 Ok((elements, coords))
71}
72
73fn extract_geometry_from_log(
75 path: &Path,
76) -> Result<(Vec<String>, Vec<f64>), Box<dyn std::error::Error>> {
77 let content = fs::read_to_string(path)?;
78
79 let lines: Vec<&str> = content.lines().collect();
81 let mut in_input_section = false;
82 let mut elements = Vec::new();
83 let mut coords = Vec::new();
84
85 for line in lines {
86 if line.contains("Input orientation") {
87 in_input_section = true;
88 continue;
89 }
90
91 if in_input_section {
92 if line.contains("Distance matrix") || line.contains("Rotational constants") {
93 break;
94 }
95
96 let parts: Vec<&str> = line.split_whitespace().collect();
98 if parts.len() >= 5 && parts[0].parse::<usize>().is_ok() {
99 let element = parts[1];
100 let x = parts[3].parse::<f64>()?;
101 let y = parts[4].parse::<f64>()?;
102 let z = parts[5].parse::<f64>()?;
103
104 elements.push(element.to_string());
105 coords.extend_from_slice(&[x, y, z]);
106 }
107 }
108 }
109
110 if elements.is_empty() {
111 return Err("Could not find geometry in log file".into());
112 }
113
114 Ok((elements, coords))
115}
116
117fn extract_geometry_from_gjf(
119 path: &Path,
120) -> Result<(Vec<String>, Vec<f64>), Box<dyn std::error::Error>> {
121 let content = fs::read_to_string(path)?;
122 let lines: Vec<&str> = content.lines().collect();
123
124 let mut elements = Vec::new();
125 let mut coords = Vec::new();
126
127 let mut state = 0;
130
131 for line in lines {
132 let trimmed = line.trim();
133
134 match state {
135 0 => {
137 if trimmed.is_empty() {
138 state = 1;
139 } else if trimmed.starts_with('%') || trimmed.starts_with('#') {
140 continue;
141 }
142 }
143 1 => {
145 if trimmed.is_empty() {
146 state = 2;
147 }
148 }
149 2 => {
151 if !trimmed.is_empty() {
152 state = 3;
154 }
155 }
156 3 => {
158 if trimmed.is_empty() {
160 break;
161 }
162
163 let parts: Vec<&str> = trimmed.split_whitespace().collect();
165 if parts.len() >= 4 {
166 elements.push(parts[0].to_string());
167 coords.push(parts[1].parse::<f64>()?);
168 coords.push(parts[2].parse::<f64>()?);
169 coords.push(parts[3].parse::<f64>()?);
170 }
171 }
172 _ => {}
173 }
174 }
175
176 if elements.is_empty() {
177 return Err("Could not find geometry in gjf file".into());
178 }
179
180 Ok((elements, coords))
181}
182
183fn generate_template(_elements: Vec<String>, _coords: &Vec<f64>, geometry_path: &Path) -> String {
185 let geom_filename = geometry_path
186 .file_name()
187 .and_then(|s| s.to_str())
188 .unwrap_or("geometry.xyz");
189
190 format!(
191 r#"#===============================================================================
192# OpenMECP Input Template
193#===============================================================================
194# Generated from geometry: {geom_filename}
195#===============================================================================
196
197#===== Basic Settings (required) ===============================================
198nprocs = 30 # processors for QM program
199mem = 120GB # memory (use maxcore value for ORCA)
200method = n b3lyp/6-31g** # QM method: n method/basis keywords ...
201td_state_a = # TD-DFT keywords for state A (Gaussian). Short: td_a
202td_state_b = # TD-DFT keywords for state B (Gaussian). Short: td_b
203mp2 = false # true for MP2 or double-hybrid in Gaussian
204charge = 1 # molecular charge (same for both states unless charge_b)
205mult_state_a = 3 # multiplicity of state A. Short: mult_a
206mult_state_b = 1 # multiplicity of state B. Short: mult_b
207mode = normal # normal | read | noread | stable | inter_read
208
209#===== Convergence Thresholds ==================================================
210delta_e = 0.000050 # energy difference convergence (hartree)
211rms_dis = 0.0025 # RMS displacement convergence (angstrom)
212max_dis = 0.004 # max displacement convergence (angstrom)
213max_grad = 0.001323 # max gradient convergence (hartree/angstrom)
214rms_grad = 0.000945 # RMS gradient convergence (hartree/angstrom)
215
216#===== Optimization Control ====================================================
217max_steps = 100 # maximum optimization steps
218max_step_size = 0.1 # maximum step size (angstrom)
219max_history = 4 # DIIS history length
220reduced_factor = 0.5 # step reduction factor near convergence
221step_reduction_multiplier = 10.0 # rms_g multiplier for step reduction threshold
222steepest_descent_step = 0.01 # fallback step when DIIS/BFGS fails (angstrom)
223bfgs_rho = 15 # BFGS step amplification factor (rho)
224
225#===== Optimizer Selection =====================================================
226switch_step = 3 # 0=DIIS-only, 3=BFGS→DIIS (default), >=max_steps=BFGS-only
227hessian = direct_psb # direct_psb (default) | inverse_bfgs | bofill | powell | bfgs_powell_mix
228 # Note: blend mode requires a direct Hessian method.
229use_gediis = false # options: false/none=GDIIS (default), true/sequential=GEDIIS,
230 # blend=GDIIS_blend with trust region
231use_hybrid_gediis = false # activated when use_gediis = true or blend
232 # options: true/false
233gediis_blend_mode = fixed_sequential # activated when use_gediis = blend AND use_hybrid_gediis = true
234 # options: fixed, fixed_sequential (default), gradient, sequential
235smart_history = false # experimental: remove worst DIIS point instead of oldest
236
237#===== Optimization Control for blend-mode trust radius only====================
238trust_reduction_factor = 0.5 # trust radius contraction on energy increase
239trust_increase_factor = 1.2 # trust radius expansion on energy decrease
240trust_inc_threshold = 0.0001 # energy increase threshold for reduction (hartree)
241trust_dec_threshold = 0.0001 # energy decrease threshold for expansion (hartree)
242trust_min_radius = 0.01 # minimum trust radius (angstrom)
243trust_max_radius = 1.0 # maximum trust radius (angstrom)
244
245#===== Advanced Optimizer Settings =============================================
246# ALL parameters below are commented out (= inactive). Their default values
247# (shown after the = sign) are used unless you uncomment and change them.
248#
249#gediis_switch_rms = 0.005 # Phase 1→2: RMS gradient for GDIIS→GEDIIS switch
250 # (active: use_gediis=true + use_hybrid_gediis=true)
251#gediis_switch_step = 0.001 # Phase 2→3: RMS displacement for GEDIIS→GDIIS switch
252 # (also used by blend's fixed_sequential mode)
253#use_robust_diis = false # Activate DIIS step validation (cosine & coefficient checks)
254#gediis_variant = auto # auto (default) | rfo | energy | simultaneous
255 # (active: use_gediis=true + use_robust_diis=true)
256#gdiis_cosine_check = standard # none | zero | standard (default) | variable | strict
257 # (active: use_robust_diis=true)
258#gdiis_coeff_check = regular # none | regular (default) | strict
259 # (active: use_robust_diis=true)
260#n_neg = 0 # 0 = minimum search, 1 = transition state
261#gediis_sim_switch = 0.0025 # (active: use_robust_diis=true)
262
263#===== Program Settings ========================================================
264program = gaussian # gaussian | orca | xtb | bagel
265gau_comm = g16 # Gaussian command
266orca_comm = orca # ORCA command
267xtb_comm = xtb # XTB command
268bagel_comm = bagel # BAGEL command
269bagel_model = model.inp # BAGEL model file
270#custom_interface_file = custom_qm.json # custom QM program config
271
272#===== Advanced Options ========================================================
273#restart = false # restart from checkpoint file
274#print_checkpoint = true # save checkpoint JSON at each step
275#fix_de = 0.0 # eV: fix energy difference (FixDE mode)
276#state_a = 0 # state index for BAGEL multireference
277#state_b = 1
278#basis = # basis set (for programs separating method/basis)
279#solvent = # solvent model
280#dispersion = # dispersion correction
281#charge_b = # separate charge for state B (default: same as charge)
282#fixedatoms = 1,3,5-7 # comma/hyphen ranges of fixed atoms (1-based)
283
284#===== ONIOM (QM/MM) ===========================================================
285#isoniom = false
286#chargeandmultforoniom1 = 0 1
287#chargeandmultforoniom2 = 0 1
288
289#===== Coordinate Driving ======================================================
290#drive_type = bond # bond | angle | dihedral
291#drive_atoms = 1,2 # atom indices (1-based)
292#drive_start = 1.0
293#drive_end = 2.0
294#drive_steps = 10
295
296#===============================================================================
297# Geometry section: Cartesian coordinates in Angstrom
298#===============================================================================
299*geom
300@{geom_filename}
301*
302
303#===============================================================================
304# Tail sections: extra keywords appended to QM input files
305#===============================================================================
306*tail_a
307# Extra keywords for state A (e.g., SCF convergence, basis sets)
308# For Gaussian: TD(NStates=5,Root=1)
309# For ORCA: %tddft nroots 5 end
310*
311
312*tail_b
313# Extra keywords for state B (e.g., SCF convergence, basis sets)
314*
315
316#===============================================================================
317# Constraints section
318# Syntax: R atom1 atom2 target (bond constraint, Angstrom)
319# A a1 a2 a3 target (angle constraint, degrees)
320# S R a1 a2 start num step (1D scan)
321# S A a1 a2 a3 start num step (angle scan)
322# S R a1 a2 start1 n1 s1 R a3 a4 start2 n2 s2 (2D scan)
323# Atom indices are 1-based.
324#===============================================================================
325*constr
326#R 1 2 1.0 # fix bond 1-2 at 1.0 Angstrom
327#A 1 2 3 100.0 # fix angle 1-2-3 at 100 degrees
328#S R 1 2 1.0 10 0.1 # scan bond 1-2 from 1.0, 10 steps of 0.1
329*
330
331#===== Output Control ==========================================================
332print_level = 1 # 0=quiet, 1=normal, 2=verbose (DIIS debug)
333
334"#,
335 geom_filename = geom_filename
336 )
337}
338
339pub fn write_template_to_file<P: AsRef<Path>>(
341 template: &str,
342 output_path: P,
343) -> Result<(), Box<dyn std::error::Error>> {
344 let output_path = output_path.as_ref();
345
346 if let Some(parent) = output_path.parent() {
348 fs::create_dir_all(parent)?;
349 }
350
351 fs::write(output_path, template)?;
352 Ok(())
353}
354
355pub fn get_default_output_path<P: AsRef<Path>>(geometry_file: P) -> PathBuf {
369 let geometry_file = geometry_file.as_ref();
370 let stem = geometry_file
371 .file_stem()
372 .and_then(|s| s.to_str())
373 .unwrap_or("template");
374
375 let source_ext = geometry_file
377 .extension()
378 .and_then(|s| s.to_str())
379 .map(|s| s.to_lowercase());
380
381 let is_qm_output = match source_ext.as_deref() {
382 Some("log") | Some("out") | Some("json") => true,
383 _ => false,
384 };
385
386 let output_stem = if is_qm_output {
388 format!("{}_omecp", stem)
389 } else {
390 stem.to_string()
391 };
392
393 PathBuf::from(format!("{}.inp", output_stem))
394}
395
396pub fn prompt_user(prompt: &str) -> Result<String, Box<dyn std::error::Error>> {
398 print!("{} ", prompt);
399 io::stdout().flush()?;
400
401 let mut input = String::new();
402 io::stdin().read_line(&mut input)?;
403 Ok(input.trim().to_string())
404}
405
406pub fn is_supported_format(path: &Path) -> bool {
408 match path.extension().and_then(|s| s.to_str()) {
409 Some(ext) => matches!(ext.to_lowercase().as_str(), "xyz" | "log" | "gjf"),
410 None => false,
411 }
412}