1use crate::config::{Config, HessianMethod, QMProgram, RunMode, ScanSpec, ScanType};
76use crate::constraints::Constraint;
77use crate::geometry::Geometry;
78use regex::Regex;
79use std::fs;
80use std::path::Path;
81use thiserror::Error;
82
83#[derive(Error, Debug)]
92pub enum ParseError {
93 #[error("IO error: {0}")]
95 Io(#[from] std::io::Error),
96 #[error("Parse error: {0}")]
98 Parse(String),
99 #[error("Invalid tail section '{section}'{}: {message}", line_number.map(|n| format!(" at line {}", n)).unwrap_or_default())]
101 InvalidTailSection {
102 section: String,
104 message: String,
106 line_number: Option<usize>,
108 },
109 #[error("Gaussian syntax error in {section}{}: {details}", line_number.map(|n| format!(" at line {}", n)).unwrap_or_default())]
111 GaussianSyntaxError {
112 section: String,
114 details: String,
116 line_number: Option<usize>,
118 },
119}
120
121type Result<T> = std::result::Result<T, ParseError>;
123
124pub struct InputData {
146 pub config: Config,
148 pub geometry: Geometry,
150 pub constraints: Vec<Constraint>,
152 pub tail_a: String,
154 pub tail_b: String,
156 pub fixed_atoms: Vec<usize>,
158 pub lst_a: Option<Geometry>,
160 pub lst_b: Option<Geometry>,
162}
163
164pub fn parse_input(path: &Path) -> Result<InputData> {
222 let content: String = fs::read_to_string(path)?;
223 let mut config: Config = Config::default();
224 let mut elements: Vec<String> = Vec::new();
225 let mut coords: Vec<f64> = Vec::new();
226 let mut constraints: Vec<Constraint> = Vec::new();
227 let mut tail_a: String = String::new();
228 let mut tail_b: String = String::new();
229 let mut fixed_atoms: Vec<usize> = Vec::new();
230 let mut lst_a_elements: Vec<String> = Vec::new();
231 let mut lst_a_coords: Vec<f64> = Vec::new();
232 let mut lst_b_elements: Vec<String> = Vec::new();
233 let mut lst_b_coords: Vec<f64> = Vec::new();
234 let mut oniom_layer_info: Vec<String> = Vec::new();
235
236 let mut in_geom: bool = false;
237 let mut in_tail_a: bool = false;
238 let mut in_tail_b: bool = false;
239 let mut in_constr: bool = false;
240 let mut in_lst_a: bool = false;
241 let mut in_lst_b: bool = false;
242
243 let geom_re: Regex = Regex::new(r"^\s*(\S+)\s+(-?\d+\.?\d*)").unwrap();
244
245 for line in content.lines() {
246 let line_lower: String = line.to_lowercase();
247 let trimmed: &str = line_lower.trim();
248
249 if trimmed.starts_with('#') {
250 continue;
251 }
252
253 if trimmed.contains("*geom") {
254 in_geom = true;
255 continue;
256 } else if trimmed.contains("*tail_a") {
257 in_tail_a = true;
258 continue;
259 } else if trimmed.contains("*tail_b") {
260 in_tail_b = true;
261 continue;
262 } else if trimmed.contains("*constr") {
263 in_constr = true;
264 continue;
265 } else if trimmed.contains("*lst_a") {
266 in_lst_a = true;
267 continue;
268 } else if trimmed.contains("*lst_b") {
269 in_lst_b = true;
270 continue;
271 } else if trimmed == "*" {
272 in_geom = false;
273 in_tail_a = false;
274 in_tail_b = false;
275 in_constr = false;
276 in_lst_a = false;
277 in_lst_b = false;
278 continue;
279 }
280
281 if in_geom {
282 if line.trim().starts_with('@') {
283 let filename: &str = line.trim().strip_prefix('@').unwrap().trim();
285 let external_path: &Path = Path::new(filename);
286 let (ext_elements, ext_coords) = read_external_geometry(external_path)?;
287 elements = ext_elements;
288 coords = ext_coords;
289 } else if geom_re.is_match(line) {
290 let parts: Vec<&str> = line.split_whitespace().collect();
291 if parts.len() >= 4 {
292 elements.push(parts[0].to_string());
293 coords.push(
294 parts[1]
295 .parse()
296 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
297 );
298 coords.push(
299 parts[2]
300 .parse()
301 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
302 );
303 coords.push(
304 parts[3]
305 .parse()
306 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
307 );
308 if parts.len() > 4 {
309 oniom_layer_info.push(parts[4..].join(" "));
310 } else {
311 oniom_layer_info.push(String::new());
312 }
313 }
314 }
315 } else if in_lst_a && geom_re.is_match(line) {
316 let parts: Vec<&str> = line.split_whitespace().collect();
317 if parts.len() >= 4 {
318 lst_a_elements.push(parts[0].to_string());
319 lst_a_coords.push(
320 parts[1]
321 .parse()
322 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
323 );
324 lst_a_coords.push(
325 parts[2]
326 .parse()
327 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
328 );
329 lst_a_coords.push(
330 parts[3]
331 .parse()
332 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
333 );
334 }
335 } else if in_lst_b && geom_re.is_match(line) {
336 let parts: Vec<&str> = line.split_whitespace().collect();
337 if parts.len() >= 4 {
338 lst_b_elements.push(parts[0].to_string());
339 lst_b_coords.push(
340 parts[1]
341 .parse()
342 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
343 );
344 lst_b_coords.push(
345 parts[2]
346 .parse()
347 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
348 );
349 lst_b_coords.push(
350 parts[3]
351 .parse()
352 .map_err(|_| ParseError::Parse("Invalid coordinate".into()))?,
353 );
354 }
355 } else if in_tail_a {
356 if !is_comment_line(line) && !line.trim().is_empty() {
357 if !tail_a.is_empty() {
358 tail_a.push(' '); }
360 tail_a.push_str(line.trim());
361 }
362 } else if in_tail_b {
363 if !is_comment_line(line) && !line.trim().is_empty() {
364 if !tail_b.is_empty() {
365 tail_b.push(' '); }
367 tail_b.push_str(line.trim());
368 }
369 } else if in_constr && !trimmed.is_empty() {
370 if trimmed.starts_with('s') {
371 parse_scan(line, &mut config)?;
372 } else {
373 parse_constraint(line, &mut constraints)?;
374 }
375 } else if trimmed.contains('=') {
376 parse_parameter(line, &mut config, &mut fixed_atoms)?;
377 }
378 }
379
380 config.oniom_layer_info = oniom_layer_info;
381
382 validate_tail_section_with_filtering_context(&tail_a, "TAIL_a")?;
385 validate_tail_section_with_filtering_context(&tail_b, "TAIL_b")?;
386
387 let geometry = Geometry::new(elements, coords);
389
390 let lst_a = if !lst_a_elements.is_empty() {
391 Some(Geometry::new(lst_a_elements, lst_a_coords))
393 } else {
394 None
395 };
396 let lst_b = if !lst_b_elements.is_empty() {
397 Some(Geometry::new(lst_b_elements, lst_b_coords))
399 } else {
400 None
401 };
402
403 validate_constraints_against_geometry(&constraints, &geometry)?;
405
406 Ok(InputData {
407 config,
408 geometry,
409 constraints,
410 tail_a,
411 tail_b,
412 fixed_atoms,
413 lst_a,
414 lst_b,
415 })
416}
417
418fn parse_constraint(line: &str, constraints: &mut Vec<Constraint>) -> Result<()> {
447 let trimmed = line.trim();
448 if trimmed.is_empty() || trimmed.starts_with('#') {
449 return Ok(());
450 }
451
452 let parts: Vec<&str> = trimmed.split_whitespace().collect();
453 if parts.is_empty() {
454 return Ok(());
455 }
456
457 let constraint_type = parts[0].to_lowercase();
458
459 match constraint_type.as_str() {
460 "r" | "bond" => {
461 if parts.len() < 4 {
462 return Err(ParseError::Parse(format!(
463 "Bond constraint requires 4 parameters: 'r atom1 atom2 target_distance'. Got: '{}'",
464 line
465 )));
466 }
467
468 let a = parse_atom_index(parts[1], "first atom", line)?;
469 let b = parse_atom_index(parts[2], "second atom", line)?;
470 let target = parse_distance_target(parts[3], line)?;
471
472 if a == b {
473 return Err(ParseError::Parse(format!(
474 "Bond constraint cannot have the same atom twice: atom {} in line '{}'",
475 a + 1,
476 line
477 )));
478 }
479
480 constraints.push(Constraint::Bond {
481 atoms: (a, b),
482 target,
483 });
484 }
485 "a" | "angle" => {
486 if parts.len() < 5 {
487 return Err(ParseError::Parse(format!(
488 "Angle constraint requires 5 parameters: 'a atom1 atom2 atom3 target_angle'. Got: '{}'",
489 line
490 )));
491 }
492
493 let a = parse_atom_index(parts[1], "first atom", line)?;
494 let b = parse_atom_index(parts[2], "second atom", line)?;
495 let c = parse_atom_index(parts[3], "third atom", line)?;
496 let target_degrees = parse_angle_target(parts[4], line)?;
497 let target = target_degrees.to_radians();
498
499 if a == b || b == c || a == c {
501 return Err(ParseError::Parse(format!(
502 "Angle constraint atoms must be unique. Got atoms {}, {}, {} in line '{}'",
503 a + 1,
504 b + 1,
505 c + 1,
506 line
507 )));
508 }
509
510 constraints.push(Constraint::Angle {
511 atoms: (a, b, c),
512 target,
513 });
514 }
515 "d" | "dihedral" => {
516 if parts.len() < 6 {
517 return Err(ParseError::Parse(format!(
518 "Dihedral constraint requires 6 parameters: 'd atom1 atom2 atom3 atom4 target_dihedral'. Got: '{}'",
519 line
520 )));
521 }
522
523 let a = parse_atom_index(parts[1], "first atom", line)?;
524 let b = parse_atom_index(parts[2], "second atom", line)?;
525 let c = parse_atom_index(parts[3], "third atom", line)?;
526 let d = parse_atom_index(parts[4], "fourth atom", line)?;
527 let target_degrees = parse_angle_target(parts[5], line)?;
528 let target = target_degrees.to_radians();
529
530 let atoms = [a, b, c, d];
532 for i in 0..4 {
533 for j in (i + 1)..4 {
534 if atoms[i] == atoms[j] {
535 return Err(ParseError::Parse(format!(
536 "Dihedral constraint atoms must be unique. Got atoms {}, {}, {}, {} in line '{}'",
537 a + 1, b + 1, c + 1, d + 1, line
538 )));
539 }
540 }
541 }
542
543 constraints.push(Constraint::Dihedral {
544 atoms: (a, b, c, d),
545 target,
546 });
547 }
548 _ => {
549 return Err(ParseError::Parse(format!(
550 "Unknown constraint type '{}'. Supported types: 'r' (bond), 'a' (angle), 'd' (dihedral). Line: '{}'",
551 constraint_type, line
552 )));
553 }
554 }
555 Ok(())
556}
557
558fn parse_atom_index(s: &str, atom_description: &str, full_line: &str) -> Result<usize> {
560 let index = s.parse::<usize>()
561 .map_err(|_| ParseError::Parse(format!(
562 "Invalid {} index '{}' in constraint line '{}'. Atom indices must be positive integers.",
563 atom_description, s, full_line
564 )))?;
565
566 if index == 0 {
567 return Err(ParseError::Parse(format!(
568 "Atom indices must be 1-based (starting from 1), got {} for {} in line '{}'",
569 index, atom_description, full_line
570 )));
571 }
572
573 Ok(index - 1) }
575
576fn parse_distance_target(s: &str, full_line: &str) -> Result<f64> {
578 let distance = s.parse::<f64>()
579 .map_err(|_| ParseError::Parse(format!(
580 "Invalid distance target '{}' in constraint line '{}'. Distance must be a positive number.",
581 s, full_line
582 )))?;
583
584 if distance <= 0.0 {
585 return Err(ParseError::Parse(format!(
586 "Distance target must be positive, got {} in line '{}'",
587 distance, full_line
588 )));
589 }
590
591 if distance > 20.0 {
592 return Err(ParseError::Parse(format!(
593 "Distance target {} Angstrom seems unreasonably large in line '{}'. Maximum allowed: 20.0 Angstrom",
594 distance, full_line
595 )));
596 }
597
598 Ok(distance)
599}
600
601fn parse_angle_target(s: &str, full_line: &str) -> Result<f64> {
603 let angle = s.parse::<f64>().map_err(|_| {
604 ParseError::Parse(format!(
605 "Invalid angle target '{}' in constraint line '{}'. Angle must be a number in degrees.",
606 s, full_line
607 ))
608 })?;
609
610 if !(0.0..=360.0).contains(&angle) {
611 return Err(ParseError::Parse(format!(
612 "Angle target {} degrees is outside valid range [0, 360] in line '{}'",
613 angle, full_line
614 )));
615 }
616
617 Ok(angle)
618}
619
620fn validate_constraints_against_geometry(
637 constraints: &[Constraint],
638 geometry: &Geometry,
639) -> Result<()> {
640 let num_atoms = geometry.num_atoms;
641
642 for (i, constraint) in constraints.iter().enumerate() {
644 match constraint {
645 Constraint::Bond {
646 atoms: (a, b),
647 target,
648 } => {
649 if *a >= num_atoms {
651 return Err(ParseError::Parse(format!(
652 "Bond constraint {}: atom index {} exceeds number of atoms ({})",
653 i + 1,
654 a + 1,
655 num_atoms
656 )));
657 }
658 if *b >= num_atoms {
659 return Err(ParseError::Parse(format!(
660 "Bond constraint {}: atom index {} exceeds number of atoms ({})",
661 i + 1,
662 b + 1,
663 num_atoms
664 )));
665 }
666
667 if *target > 10.0 {
669 return Err(ParseError::Parse(format!(
670 "Bond constraint {}: target distance {:.3} Angstrom is unreasonably large",
671 i + 1,
672 target
673 )));
674 }
675 }
676 Constraint::Angle {
677 atoms: (a, b, c),
678 target: _,
679 } => {
680 for (atom_idx, atom_name) in [(*a, "first"), (*b, "second"), (*c, "third")] {
682 if atom_idx >= num_atoms {
683 return Err(ParseError::Parse(format!(
684 "Angle constraint {}: {} atom index {} exceeds number of atoms ({})",
685 i + 1,
686 atom_name,
687 atom_idx + 1,
688 num_atoms
689 )));
690 }
691 }
692 }
693 Constraint::Dihedral {
694 atoms: (a, b, c, d),
695 target: _,
696 } => {
697 for (atom_idx, atom_name) in
699 [(*a, "first"), (*b, "second"), (*c, "third"), (*d, "fourth")]
700 {
701 if atom_idx >= num_atoms {
702 return Err(ParseError::Parse(format!(
703 "Dihedral constraint {}: {} atom index {} exceeds number of atoms ({})",
704 i + 1,
705 atom_name,
706 atom_idx + 1,
707 num_atoms
708 )));
709 }
710 }
711 }
712 }
713 }
714
715 for i in 0..constraints.len() {
717 for j in (i + 1)..constraints.len() {
718 if constraints_are_equivalent(&constraints[i], &constraints[j]) {
719 return Err(ParseError::Parse(format!(
720 "Duplicate constraints found: constraint {} and {} define the same geometric parameter",
721 i + 1, j + 1
722 )));
723 }
724 }
725 }
726
727 Ok(())
728}
729
730fn constraints_are_equivalent(c1: &Constraint, c2: &Constraint) -> bool {
732 match (c1, c2) {
733 (
734 Constraint::Bond {
735 atoms: (a1, b1), ..
736 },
737 Constraint::Bond {
738 atoms: (a2, b2), ..
739 },
740 ) => (a1 == a2 && b1 == b2) || (a1 == b2 && b1 == a2),
741 (
742 Constraint::Angle {
743 atoms: (a1, b1, c1),
744 ..
745 },
746 Constraint::Angle {
747 atoms: (a2, b2, c2),
748 ..
749 },
750 ) => (a1 == a2 && b1 == b2 && c1 == c2) || (a1 == c2 && b1 == b2 && c1 == a2),
751 (
752 Constraint::Dihedral {
753 atoms: (a1, b1, c1, d1),
754 ..
755 },
756 Constraint::Dihedral {
757 atoms: (a2, b2, c2, d2),
758 ..
759 },
760 ) => {
761 (a1 == a2 && b1 == b2 && c1 == c2 && d1 == d2)
762 || (a1 == d2 && b1 == c2 && c1 == b2 && d1 == a2)
763 }
764 _ => false,
765 }
766}
767
768fn parse_scan(line: &str, config: &mut Config) -> Result<()> {
769 let parts: Vec<&str> = line.split_whitespace().collect();
770 if parts.len() < 6 {
771 return Ok(());
772 }
773
774 let scan_type = match parts[1].to_lowercase().as_str() {
775 "r" if parts.len() >= 7 => {
776 let a = parts[2]
777 .parse::<usize>()
778 .map_err(|_| ParseError::Parse("Invalid atom".into()))?
779 - 1;
780 let b = parts[3]
781 .parse::<usize>()
782 .map_err(|_| ParseError::Parse("Invalid atom".into()))?
783 - 1;
784 ScanType::Bond { atoms: (a, b) }
785 }
786 "a" if parts.len() >= 8 => {
787 let a = parts[2]
788 .parse::<usize>()
789 .map_err(|_| ParseError::Parse("Invalid atom".into()))?
790 - 1;
791 let b = parts[3]
792 .parse::<usize>()
793 .map_err(|_| ParseError::Parse("Invalid atom".into()))?
794 - 1;
795 let c = parts[4]
796 .parse::<usize>()
797 .map_err(|_| ParseError::Parse("Invalid atom".into()))?
798 - 1;
799 ScanType::Angle { atoms: (a, b, c) }
800 }
801 _ => return Ok(()),
802 };
803
804 let offset = if matches!(scan_type, ScanType::Bond { .. }) {
805 4
806 } else {
807 5
808 };
809 let start = parts[offset]
810 .parse()
811 .map_err(|_| ParseError::Parse("Invalid start".into()))?;
812 let num_points = parts[offset + 1]
813 .parse()
814 .map_err(|_| ParseError::Parse("Invalid num".into()))?;
815 let step_size = parts[offset + 2]
816 .parse()
817 .map_err(|_| ParseError::Parse("Invalid step".into()))?;
818
819 config.scans.push(ScanSpec {
820 scan_type,
821 start,
822 num_points,
823 step_size,
824 });
825 Ok(())
826}
827
828fn parse_bool(value: &str) -> bool {
837 let value_lower = value.trim().to_lowercase();
838 match value_lower.as_str() {
839 "true" | "yes" | "1" => true,
840 "false" | "no" | "0" => false,
841 _ => false, }
843}
844
845pub fn parse_parameter(line: &str, config: &mut Config, fixed_atoms: &mut Vec<usize>) -> Result<()> {
848 let parts: Vec<&str> = line.splitn(2, '=').collect();
849 if parts.len() != 2 {
850 return Ok(());
851 }
852
853 let key = parts[0].trim().to_lowercase();
854 let raw_value = parts[1].trim();
855
856 let value = if let Some(comment_pos) = raw_value.find('#') {
858 raw_value[..comment_pos].trim()
859 } else {
860 raw_value
861 };
862
863 match key.as_str() {
864 "nprocs" => config.nprocs = value.parse().unwrap_or(1),
865 "mem" => config.mem = value.to_string(),
866 "charge" => config.charge = value.parse().unwrap_or(0),
867 "charge_b" => {
868 if let Ok(val) = value.parse() {
869 config.charge_b = Some(val);
870 }
871 }
872 "mult_state_a" | "mult_a" => config.mult_state_a = value.parse().unwrap_or(1),
873 "mult_state_b" | "mult_b" => config.mult_state_b = value.parse().unwrap_or(1),
874 "method" => config.method = value.to_string(),
875 "program" => {
876 config.program = match value.to_lowercase().as_str() {
877 "gaussian" => QMProgram::Gaussian,
878 "orca" => QMProgram::Orca,
879 "bagel" => QMProgram::Bagel,
880 "xtb" => QMProgram::Xtb,
881 _ => QMProgram::Gaussian,
882 };
883 }
884 "mode" => {
885 config.run_mode = match value.to_lowercase().as_str() {
886 "read" => RunMode::Read,
887 "noread" => RunMode::NoRead,
888 "stable" => RunMode::Stable,
889 "inter_read" => RunMode::InterRead,
890 _ => RunMode::Normal,
891 };
892 }
893 "td_state_a" | "td_a" => config.td_state_a = value.to_string(),
894 "td_state_b" | "td_b" => config.td_state_b = value.to_string(),
895 "state_a" => config.state_a = value.parse().unwrap_or(0),
896 "state_b" => config.state_b = value.parse().unwrap_or(0),
897 "mp2" => config.mp2 = parse_bool(value),
898 "max_steps" => config.max_steps = value.parse().unwrap_or(100),
899 "max_step_size" => {
900 config.max_step_size = value.parse().unwrap_or(0.1);
902 }
903 "max_history" => {
904 config.max_history = value.parse().unwrap_or_else(|_| {
905 eprintln!(
906 "Warning: Invalid max_history value '{}', using default (5)",
907 value
908 );
909 5
910 });
911 }
912 "fix_de" => config.fix_de = value.parse().unwrap_or(0.0),
913 "delta_e" => config.thresholds.delta_e = value.parse().unwrap_or(0.000050),
914 "rms_dis" => {
916 config.thresholds.rms_dis = value.parse().unwrap_or(0.0025);
917 }
918 "max_dis" => {
919 config.thresholds.max_dis = value.parse().unwrap_or(0.004);
920 }
921 "max_grad" => {
923 config.thresholds.max_grad = value.parse().unwrap_or(0.0007);
924 }
925 "rms_grad" => {
926 config.thresholds.rms_grad = value.parse().unwrap_or(0.0005);
927 }
928 "bagel_model" => config.bagel_model = value.to_string(),
929 "custom_interface_file" => config.custom_interface_file = value.to_string(),
930 "drive_coordinate" => config.drive_coordinate = value.to_string(),
931 "drive_start" => config.drive_start = value.parse().unwrap_or(0.0),
932 "drive_end" => config.drive_end = value.parse().unwrap_or(0.0),
933 "drive_steps" => config.drive_steps = value.parse().unwrap_or(10),
934 "drive_type" => config.drive_type = value.to_string(),
935 "use_gediis" => {
936 let lower = value.trim().to_lowercase();
937 match lower.as_str() {
938 "blend" | "sequential" => {
939 config.use_gediis = true;
940 config.gediis_variant = lower;
941 }
942 _ => config.use_gediis = parse_bool(value),
943 }
944 }
945 "use_hybrid_gediis" => config.use_hybrid_gediis = parse_bool(value),
946 "gediis_blend_mode" => {
947 let lower = value.trim().to_lowercase();
948 match lower.as_str() {
949 "fixed" | "gradient" | "sequential" | "fixed_sequential" => {
950 config.gediis_blend_mode = lower;
951 }
952 _ => {
953 eprintln!(
954 "Warning: Invalid gediis_blend_mode '{}', expected 'fixed', 'fixed_sequential', 'gradient', or 'sequential', using 'fixed'",
955 value
956 );
957 }
958 }
959 }
960 "gediis_switch_rms" => {
961 config.gediis_switch_rms = value.parse().unwrap_or_else(|_| {
962 eprintln!("Warning: Invalid gediis_switch_rms '{}', using default 0.005", value);
963 0.005
964 });
965 }
966 "gediis_switch_step" => {
967 config.gediis_switch_step = value.parse().unwrap_or_else(|_| {
968 eprintln!("Warning: Invalid gediis_switch_step '{}', using default 0.001", value);
969 0.001
970 });
971 }
972 "switch_step" => {
973 config.switch_step = value.parse().unwrap_or_else(|_| {
974 eprintln!(
975 "Warning: Invalid switch_step value '{}', using default (3)",
976 value
977 );
978 3
979 });
980 }
981 "drive_atoms" => {
982 config.drive_atoms = value
983 .split(',')
984 .filter_map(|s| s.trim().parse::<usize>().ok())
985 .map(|x| x.saturating_sub(1)) .collect();
987 }
988 "fixedatoms" => {
989 for group in value.split(',') {
990 if group.contains('-') {
991 let range: Vec<&str> = group.split('-').collect();
992 if range.len() == 2 {
993 if let (Ok(start), Ok(end)) =
994 (range[0].parse::<usize>(), range[1].parse::<usize>())
995 {
996 for i in start..=end {
997 fixed_atoms.push(i - 1);
998 }
999 }
1000 }
1001 } else if let Ok(atom) = group.parse::<usize>() {
1002 fixed_atoms.push(atom - 1);
1003 }
1004 }
1005 }
1006 "gau_comm" => {
1007 config
1008 .program_commands
1009 .insert("gaussian".to_string(), value.to_string());
1010 }
1011 "orca_comm" => {
1012 config
1013 .program_commands
1014 .insert("orca".to_string(), value.to_string());
1015 }
1016 "xtb_comm" => {
1017 config
1018 .program_commands
1019 .insert("xtb".to_string(), value.to_string());
1020 }
1021 "bagel_comm" => {
1022 config
1023 .program_commands
1024 .insert("bagel".to_string(), value.to_string());
1025 }
1026 "isoniom" => config.is_oniom = parse_bool(value),
1027 "chargeandmultforoniom1" => config.charge_and_mult_oniom1 = value.to_string(),
1028 "chargeandmultforoniom2" => config.charge_and_mult_oniom2 = value.to_string(),
1029 "basis" => config.basis_set = value.to_string(),
1030 "solvent" => config.solvent = value.to_string(),
1031 "dispersion" => config.dispersion = value.to_string(),
1032 "restart" => config.restart = parse_bool(value),
1033 "print_checkpoint" => config.print_checkpoint = parse_bool(value),
1034 "smart_history" => config.smart_history = parse_bool(value),
1035 "print_level" => config.print_level = value.parse().unwrap_or(1),
1036 "reduced_factor" => config.reduced_factor = value.parse().unwrap_or(0.5),
1037 "bfgs_rho" => config.bfgs_rho = value.parse().unwrap_or(15.0),
1038 "step_reduction_multiplier" => {
1039 config.step_reduction_multiplier = value.parse().unwrap_or_else(|_| {
1040 eprintln!("Warning: Invalid step_reduction_multiplier '{}', using default 10.0", value);
1041 10.0
1042 });
1043 }
1044 "trust_reduction_factor" => {
1045 config.trust_reduction_factor = value.parse().unwrap_or_else(|_| {
1046 eprintln!("Warning: Invalid trust_reduction_factor '{}', using default 0.5", value);
1047 0.5
1048 });
1049 }
1050 "trust_increase_factor" => {
1051 config.trust_increase_factor = value.parse().unwrap_or_else(|_| {
1052 eprintln!("Warning: Invalid trust_increase_factor '{}', using default 1.2", value);
1053 1.2
1054 });
1055 }
1056 "trust_inc_threshold" => {
1057 config.trust_inc_threshold = value.parse().unwrap_or_else(|_| {
1058 eprintln!("Warning: Invalid trust_inc_threshold '{}', using default 0.0001", value);
1059 0.0001
1060 });
1061 }
1062 "trust_dec_threshold" => {
1063 config.trust_dec_threshold = value.parse().unwrap_or_else(|_| {
1064 eprintln!("Warning: Invalid trust_dec_threshold '{}', using default 0.0001", value);
1065 0.0001
1066 });
1067 }
1068 "trust_min_radius" => {
1069 config.trust_min_radius = value.parse().unwrap_or_else(|_| {
1070 eprintln!("Warning: Invalid trust_min_radius '{}', using default 0.01", value);
1071 0.01
1072 });
1073 }
1074 "trust_max_radius" => {
1075 config.trust_max_radius = value.parse().unwrap_or_else(|_| {
1076 eprintln!("Warning: Invalid trust_max_radius '{}', using default 1.0", value);
1077 1.0
1078 });
1079 }
1080 "steepest_descent_step" => {
1081 config.steepest_descent_step = value.parse().unwrap_or_else(|_| {
1082 eprintln!("Warning: Invalid steepest_descent_step '{}', using default 0.01", value);
1083 0.01
1084 });
1085 }
1086 "use_robust_diis" => config.use_robust_diis = parse_bool(value),
1088 "gediis_variant" => config.gediis_variant = value.to_lowercase(),
1089 "gdiis_cosine_check" => config.gdiis_cosine_check = value.to_lowercase(),
1090 "gdiis_coeff_check" => config.gdiis_coeff_check = value.to_lowercase(),
1091 "n_neg" => config.n_neg = value.parse().unwrap_or(0),
1092 "gediis_sim_switch" => config.gediis_sim_switch = value.parse().unwrap_or(0.0025),
1093 "hessian" => {
1095 let lower = value.trim().to_lowercase();
1096 config.hessian_method = match lower.as_str() {
1097 "direct_psb" => HessianMethod::DirectPsb,
1098 "inverse_bfgs" => HessianMethod::InverseBfgs,
1099 "bofill" => HessianMethod::Bofill,
1100 "powell" | "sr1" => HessianMethod::Powell,
1101 "bfgs_powell_mix" | "mix" => HessianMethod::BfgsPowellMix,
1102 _ => {
1103 eprintln!(
1104 "Warning: Invalid hessian method '{}', expected 'direct_psb', 'inverse_bfgs', 'bofill', 'powell', or 'bfgs_powell_mix', using 'direct_psb'",
1105 value
1106 );
1107 HessianMethod::DirectPsb
1108 }
1109 };
1110 }
1111 _ => {}
1112 }
1113 Ok(())
1114}
1115
1116pub fn filter_tail_content(raw_content: &str) -> String {
1148 raw_content
1149 .lines()
1150 .filter_map(|line| {
1151 let trimmed = line.trim();
1152
1153 if trimmed.is_empty() {
1155 return None;
1156 }
1157
1158 if is_comment_line(line) {
1160 return None;
1161 }
1162
1163 let cleaned_line = if let Some(comment_pos) = trimmed.find('#') {
1165 trimmed[..comment_pos].trim()
1166 } else {
1167 trimmed
1168 };
1169
1170 if cleaned_line.is_empty() {
1172 None
1173 } else {
1174 Some(cleaned_line)
1175 }
1176 })
1177 .collect::<Vec<_>>()
1178 .join(" ")
1179}
1180
1181pub fn is_comment_line(line: &str) -> bool {
1205 line.trim().starts_with('#')
1206}
1207
1208pub fn validate_gaussian_keywords(content: &str) -> Result<()> {
1236 validate_gaussian_syntax(content, "tail")
1237}
1238
1239pub fn validate_gaussian_syntax(content: &str, section_name: &str) -> Result<()> {
1272 if content.is_empty() {
1273 return Ok(());
1274 }
1275
1276 let invalid_chars = ['\n', '\r', '\t'];
1278
1279 for ch in invalid_chars {
1280 if content.contains(ch) {
1281 let char_name = match ch {
1282 '\n' => "newline",
1283 '\r' => "carriage return",
1284 '\t' => "tab",
1285 _ => "unknown",
1286 };
1287
1288 let suggestion = match ch {
1289 '\n' | '\r' => "Put all keywords on a single line or separate them with spaces instead of line breaks",
1290 '\t' => "Replace tabs with single spaces between keywords",
1291 _ => "Remove this character and use spaces to separate keywords",
1292 };
1293
1294 return Err(ParseError::GaussianSyntaxError {
1295 section: section_name.to_string(),
1296 details: format!("Invalid {} character found in Gaussian keywords. Gaussian route sections must be on a single line. Suggestion: {}", char_name, suggestion),
1297 line_number: None,
1298 });
1299 }
1300 }
1301
1302 if content.contains('#') {
1304 return Err(ParseError::GaussianSyntaxError {
1305 section: section_name.to_string(),
1306 details: "Comment character '#' found in keywords. Comments are not allowed in Gaussian route sections. Suggestion: Move comments to separate lines in your OpenMECP input file, or remove them entirely from the tail section.".to_string(),
1307 line_number: None,
1308 });
1309 }
1310
1311 let problematic_chars = ['|', '&', ';', '>', '<'];
1313 for ch in problematic_chars {
1314 if content.contains(ch) {
1315 let suggestion = match ch {
1316 '|' => "Use parentheses or commas to separate options instead of pipes",
1317 '&' => "Remove ampersands - they are not valid in Gaussian keywords",
1318 ';' => "Use spaces or commas to separate keywords instead of semicolons",
1319 '>' | '<' => {
1320 "Remove redirection operators - they are not valid in Gaussian keywords"
1321 }
1322 _ => "Remove this character and check Gaussian manual for proper keyword syntax",
1323 };
1324
1325 return Err(ParseError::GaussianSyntaxError {
1326 section: section_name.to_string(),
1327 details: format!("Potentially problematic character '{}' found. This may cause Gaussian parsing errors. Suggestion: {}", ch, suggestion),
1328 line_number: None,
1329 });
1330 }
1331 }
1332
1333 let open_parens = content.chars().filter(|&c| c == '(').count();
1335 let close_parens = content.chars().filter(|&c| c == ')').count();
1336 if open_parens != close_parens {
1337 let suggestion = if open_parens > close_parens {
1338 format!("Add {} closing parenthesis ')'", open_parens - close_parens)
1339 } else {
1340 format!(
1341 "Remove {} extra closing parenthesis ')' or add matching opening parenthesis '('",
1342 close_parens - open_parens
1343 )
1344 };
1345
1346 return Err(ParseError::GaussianSyntaxError {
1347 section: section_name.to_string(),
1348 details: format!("Unbalanced parentheses in Gaussian keywords: {} opening '(', {} closing ')'. Suggestion: {}", open_parens, close_parens, suggestion),
1349 line_number: None,
1350 });
1351 }
1352
1353 if content.contains(" ") {
1355 return Err(ParseError::GaussianSyntaxError {
1356 section: section_name.to_string(),
1357 details: "Multiple consecutive spaces found between keywords. Gaussian prefers single spaces. Suggestion: Replace multiple spaces with single spaces between keywords.".to_string(),
1358 line_number: None,
1359 });
1360 }
1361
1362 if content.contains("=,") || content.contains(",=") {
1364 return Err(ParseError::GaussianSyntaxError {
1365 section: section_name.to_string(),
1366 details: "Invalid syntax with equals sign and comma combination. Suggestion: Check Gaussian manual for proper keyword=value syntax.".to_string(),
1367 line_number: None,
1368 });
1369 }
1370
1371 let trimmed = content.trim();
1373 if trimmed.starts_with(',') || trimmed.ends_with(',') {
1374 return Err(ParseError::GaussianSyntaxError {
1375 section: section_name.to_string(),
1376 details: "Keywords start or end with comma, which may cause parsing issues. Suggestion: Remove leading/trailing commas and ensure proper keyword syntax.".to_string(),
1377 line_number: None,
1378 });
1379 }
1380
1381 Ok(())
1382}
1383
1384pub fn validate_tail_section_with_context(content: &str, section_name: &str) -> Result<()> {
1417 if content.is_empty() {
1419 log_empty_tail_section_info(section_name);
1422 return Ok(());
1423 }
1424
1425 validate_tail_section(content, section_name)
1427}
1428
1429pub fn validate_tail_section_with_filtering_context(
1445 content: &str,
1446 section_name: &str,
1447) -> Result<()> {
1448 validate_tail_section_with_context(content, section_name)
1450}
1451
1452fn log_empty_tail_section_info(section_name: &str) {
1468 eprintln!(
1471 "INFO: {} section is empty after comment filtering. This is acceptable behavior.",
1472 section_name
1473 );
1474 eprintln!(
1475 " If you intended to include Gaussian keywords in {}, ensure they are not commented out.",
1476 section_name
1477 );
1478 eprintln!(
1479 " Common {} keywords: TD(NStates=5,Root=1), TD(NStates=10), etc.",
1480 section_name
1481 );
1482}
1483
1484pub fn validate_tail_section(content: &str, section_name: &str) -> Result<()> {
1516 if content.is_empty() {
1518 return Ok(());
1519 }
1520
1521 if content.to_uppercase().contains("TD") && !content.contains('(') {
1525 let suggestion_message = format!(
1526 "TD keyword found without required parentheses in {} section.\n\nProblem: TD-DFT calculations require parameters in parentheses.\n\nSuggestions:\n • Use 'TD(NStates=N)' where N is the number of excited states (e.g., TD(NStates=5))\n • For specific roots: 'TD(NStates=N,Root=M)' where M is the state number\n • Common examples:\n - TD(NStates=5,Root=1) # First excited state, calculate 5 states\n - TD(NStates=10) # Calculate 10 excited states",
1527 section_name
1528 );
1529
1530 return Err(ParseError::InvalidTailSection {
1531 section: section_name.to_string(),
1532 message: suggestion_message,
1533 line_number: None,
1534 });
1535 }
1536
1537 if content.to_uppercase().contains("ROOT") && !content.to_uppercase().contains("TD") {
1539 let suggestion_message = format!(
1540 "ROOT keyword found without TD keyword in {} section.\n\nProblem: ROOT is typically used with TD-DFT calculations to specify which excited state to optimize.\n\nSuggestions:\n • Use 'TD(NStates=N,Root=M)' for combined TD-DFT with root specification\n • Example: 'TD(NStates=5,Root=1)' to optimize the first excited state\n • If you need separate keywords: 'TD(NStates=5) Root=1'",
1541 section_name
1542 );
1543
1544 return Err(ParseError::InvalidTailSection {
1545 section: section_name.to_string(),
1546 message: suggestion_message,
1547 line_number: None,
1548 });
1549 }
1550
1551 let common_issues = [
1553 (
1554 "NSTATES",
1555 "Use 'NStates' instead of 'NSTATES' (case-sensitive)",
1556 ),
1557 (
1558 "nstates",
1559 "Use 'NStates' instead of 'nstates' (case-sensitive)",
1560 ),
1561 ("root=", "Use 'Root=' instead of 'root=' (case-sensitive)"),
1562 ("ROOT=", "Use 'Root=' instead of 'ROOT=' (case-sensitive)"),
1563 ("td(", "Use 'TD(' instead of 'td(' (case-sensitive)"),
1564 ];
1565
1566 for (wrong, suggestion) in &common_issues {
1567 if content.contains(wrong) {
1568 return Err(ParseError::InvalidTailSection {
1569 section: section_name.to_string(),
1570 message: format!(
1571 "Potential keyword formatting issue: found '{}'. Suggestion: {}",
1572 wrong, suggestion
1573 ),
1574 line_number: None,
1575 });
1576 }
1577 }
1578
1579 if content.contains("NStates") && !content.contains("NStates=") {
1581 let suggestion_message = format!(
1582 "NStates keyword found without value assignment in {} section.\n\nProblem: NStates requires a numeric value to specify how many excited states to calculate.\n\nSuggestions:\n • Use 'NStates=N' where N is the number of excited states\n • Common values: NStates=5, NStates=10, NStates=20\n • Example: 'TD(NStates=5,Root=1)' or 'TD(NStates=5) Root=1'",
1583 section_name
1584 );
1585
1586 return Err(ParseError::InvalidTailSection {
1587 section: section_name.to_string(),
1588 message: suggestion_message,
1589 line_number: None,
1590 });
1591 }
1592
1593 if content.contains("Root") && !content.contains("Root=") {
1594 let suggestion_message = format!(
1595 "Root keyword found without value assignment in {} section.\n\nProblem: Root requires a numeric value to specify which excited state to optimize.\n\nSuggestions:\n • Use 'Root=N' where N is the excited state number (starting from 1)\n • Example: 'Root=1' for first excited state, 'Root=2' for second excited state\n • Complete example: 'TD(NStates=5,Root=1)' or 'TD(NStates=5) Root=1'",
1596 section_name
1597 );
1598
1599 return Err(ParseError::InvalidTailSection {
1600 section: section_name.to_string(),
1601 message: suggestion_message,
1602 line_number: None,
1603 });
1604 }
1605
1606 validate_gaussian_keywords(content).map_err(|e| match e {
1608 ParseError::GaussianSyntaxError {
1609 details,
1610 line_number,
1611 ..
1612 } => {
1613 let user_friendly_message =
1615 create_user_friendly_tail_error(&details, section_name, content);
1616
1617 ParseError::InvalidTailSection {
1618 section: section_name.to_string(),
1619 message: user_friendly_message,
1620 line_number,
1621 }
1622 }
1623 _ => {
1624 let fallback_message = format!(
1626 "Validation failed for {} section: {}\n\n{}",
1627 section_name,
1628 e,
1629 get_tail_section_suggestions(content, section_name)
1630 );
1631
1632 ParseError::InvalidTailSection {
1633 section: section_name.to_string(),
1634 message: fallback_message,
1635 line_number: None,
1636 }
1637 }
1638 })
1639}
1640
1641fn read_geom_from_xyz(path: &Path) -> Result<(Vec<String>, Vec<f64>)> {
1642 let content = fs::read_to_string(path)?;
1643 let mut elements = Vec::new();
1644 let mut coords = Vec::new();
1645
1646 for line in content.lines() {
1647 let line = line.trim();
1648 if line.is_empty() || !line.chars().next().is_some_and(|c| c.is_alphabetic()) {
1649 continue;
1650 }
1651 let parts: Vec<&str> = line.split_whitespace().collect();
1652 if parts.len() >= 4 {
1653 elements.push(parts[0].to_string());
1654 for &coord_str in &parts[1..4] {
1655 coords.push(
1656 coord_str
1657 .parse()
1658 .map_err(|_| ParseError::Parse("Invalid coordinate in XYZ file".into()))?,
1659 );
1660 }
1661 }
1662 }
1663 Ok((elements, coords))
1664}
1665
1666fn read_geom_from_gjf(path: &Path) -> Result<(Vec<String>, Vec<f64>)> {
1667 let content = fs::read_to_string(path)?;
1668 let mut elements = Vec::new();
1669 let mut coords = Vec::new();
1670 let mut in_geom = false;
1671
1672 for line in content.lines() {
1673 let line = line.trim();
1674 if line.is_empty() {
1675 continue;
1676 }
1677 if line.contains("0 1") || line.contains("0 2") || line.contains("0 3") {
1678 in_geom = true;
1679 continue;
1680 }
1681 if in_geom && line.chars().next().is_some_and(|c| c.is_alphabetic()) {
1682 let parts: Vec<&str> = line.split_whitespace().collect();
1683 if parts.len() >= 4 {
1684 elements.push(parts[0].to_string());
1685 for &coord_str in &parts[1..4] {
1686 coords.push(
1687 coord_str.parse().map_err(|_| {
1688 ParseError::Parse("Invalid coordinate in GJF file".into())
1689 })?,
1690 );
1691 }
1692 }
1693 } else if in_geom && line.chars().next().is_some_and(|c| c.is_ascii_digit()) {
1694 break;
1695 }
1696 }
1697 Ok((elements, coords))
1698}
1699
1700fn read_geom_from_log(path: &Path) -> Result<(Vec<String>, Vec<f64>)> {
1701 let content = fs::read_to_string(path)?;
1702 let mut elements = Vec::new();
1703 let mut coords = Vec::new();
1704 let mut in_geom = false;
1705
1706 for line in content.lines() {
1707 if line.contains("Input orientation") {
1708 in_geom = true;
1709 elements.clear();
1710 coords.clear();
1711 continue;
1712 } else if line.contains("Distance matrix") || line.contains("Rotational constants") {
1713 in_geom = false;
1714 continue;
1715 }
1716 if in_geom {
1717 let parts: Vec<&str> = line.split_whitespace().collect();
1718 if parts.len() >= 6 && parts[0].chars().all(|c| c.is_ascii_digit()) {
1719 elements.push(parts[1].to_string());
1721 for &coord_str in &parts[3..6] {
1722 coords.push(
1723 coord_str.parse().map_err(|_| {
1724 ParseError::Parse("Invalid coordinate in LOG file".into())
1725 })?,
1726 );
1727 }
1728 }
1729 }
1730 }
1731 Ok((elements, coords))
1732}
1733
1734fn get_tail_section_suggestions(content: &str, section_name: &str) -> String {
1748 let mut suggestions = Vec::new();
1749
1750 if content.contains('\n') {
1752 suggestions.push("Put all keywords on a single line separated by spaces".to_string());
1753 }
1754
1755 if content.contains('#') {
1756 suggestions.push("Move comments to separate lines outside the tail section".to_string());
1757 }
1758
1759 if content.to_uppercase().contains("TD") && !content.contains('(') {
1760 suggestions.push("Add parentheses to TD keyword: TD(NStates=5,Root=1)".to_string());
1761 }
1762
1763 if content.contains(" ") {
1764 suggestions.push("Replace multiple spaces with single spaces between keywords".to_string());
1765 }
1766
1767 if content.chars().filter(|&c| c == '(').count()
1768 != content.chars().filter(|&c| c == ')').count()
1769 {
1770 suggestions.push("Check that all parentheses are properly balanced".to_string());
1771 }
1772
1773 if suggestions.is_empty() {
1775 suggestions.push(format!("Ensure {} contains valid Gaussian keywords (e.g., 'TD(NStates=5,Root=1)' for TD-DFT calculations)", section_name));
1776 suggestions.push("Check the Gaussian manual for proper keyword syntax".to_string());
1777 suggestions.push("Remove any shell command characters or special symbols".to_string());
1778 }
1779
1780 format!(
1781 "Suggestions for fixing {} section:\n • {}",
1782 section_name,
1783 suggestions.join("\n • ")
1784 )
1785}
1786
1787fn create_user_friendly_tail_error(
1803 error_details: &str,
1804 section_name: &str,
1805 content: &str,
1806) -> String {
1807 let suggestions = get_tail_section_suggestions(content, section_name);
1808
1809 format!(
1810 "Problem in {} section: {}\n\n{}\n\nExample of valid {} content:\n TD(NStates=5,Root=1)\n or\n TD(NStates=10) Root=2",
1811 section_name,
1812 error_details,
1813 suggestions,
1814 section_name
1815 )
1816}
1817
1818fn read_external_geometry(path: &Path) -> Result<(Vec<String>, Vec<f64>)> {
1819 let path_str = path.to_string_lossy();
1820 if path_str.ends_with(".xyz") {
1821 read_geom_from_xyz(path)
1822 } else if path_str.ends_with(".gjf") {
1823 read_geom_from_gjf(path)
1824 } else if path_str.ends_with(".log") {
1825 read_geom_from_log(path)
1826 } else {
1827 Err(ParseError::Parse(
1828 "Unsupported external geometry file format".into(),
1829 ))
1830 }
1831}