-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCombineML - Copy.py
More file actions
164 lines (129 loc) · 7.11 KB
/
CombineML - Copy.py
File metadata and controls
164 lines (129 loc) · 7.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import glob
import re
def read_poscar(filename):
# Reading the POSCAR file
with open(filename, "r") as file:
lines = file.readlines()
# The first line is the comment
comment = lines[0].strip()
# The second line is the scale
scale = float(lines[1].strip())
# The next three lines are the cell vectors
cell_vectors = [list(map(float, line.strip().split())) for line in lines[2:5]]
# The next line is the atom types
atom_types = lines[5].strip().split()
# The next line is the number of atoms
num_atoms = list(map(int, lines[6].strip().split()))
# The next line is the coordinate type
coordinate_type = lines[7].strip()
# Remaining lines are the atomic position data
atomic_positions = [list(map(float, line.strip().split())) for line in lines[8:]]
return comment, scale, cell_vectors, atom_types, num_atoms, coordinate_type, atomic_positions
def read_outcar(filename):
# Reading the OUTCAR file
with open(filename, "r") as file:
content = file.read()
# Finding and extracting forces data
forces_section_start = content.index(re.search(r"POSITION\s+TOTAL-FORCE \(eV/Angst\)", content).group(0))
content_lines = content[forces_section_start:].split("\n")
end_line_keyword = "total drift:"
forces_section_end = next(i for i, line in enumerate(content_lines[2:]) if end_line_keyword in line.strip())
forces_section = content_lines[2:forces_section_end+1] # Start from 2 to skip the two lines
forces_data = []
for line in forces_section:
if not line.strip(): # Added this line
continue # And this one
line_data = list(map(float, line.split()[3:6])) # Get data from positions 3 to 5 (Force X, Y, Z)
forces_data.append(line_data)
print(f"Forces in file {filename}:")
print(forces_data[:2]) # First two lines of forces
print(forces_data[-2:]) # Last two lines of forces
print("------")
# Finding and extracting energy data
energy_line = re.search(r"energy\(sigma->0\) =(.*?)\n", content).group(1)
energy_data = float(energy_line.strip())
# Finding and extracting stress data
stress_line = re.search(r"in kB(.*?)\n", content).group(1)
stress_data = list(map(float, stress_line.strip().split()))
return forces_data, energy_data, stress_data
# Placeholder function to form ML_ABN file lines
def form_ml_abn_file(header_data, config_data):
# Formatting the header
version_text = " 1.0 Version\n"
num_configs_text = f" The number of configurations\n{header_data['num_configs']}\n"
max_num_atomtypes_text = f" The maximum number of atom type\n{header_data['max_num_atomtypes']}\n"
atomtypes_text = f" The atom types in the data file\n{' '.join(header_data['atomtypes'])}\n"
max_num_atoms_text = f" The maximum number of atoms per system\n{header_data['max_num_atoms']}\n"
# Formatting configs
config_texts = []
for i, config in enumerate(config_data, start=1): # Numbering starts from 1
config_texts.append(f" Configuration num. {i}")
config_texts.append(' System name\nSystem') # Not sure what the System Name should be
config_texts.append(f" The number of atom types\n{len(set(config['elements_data']))}") # Assuming elements data contains actual element names
config_texts.append(f" The number of atoms\n{len(config['elements_data'])}") # Assuming elements data contains actual element names
config_texts.append(' Atom types and atom numbers')
for element in set(config['elements_data']):
config_texts.append(f" {element} {config['elements_data'].count(element)}") # Number of each type of atom
#CTIFOR value will be ignored and not written
#config_texts.append(" CTIFOR (optional)\n100")
config_texts.append(" Primitive lattice vectors (ang.)\n" + "\n".join(" ".join(map(str, line)) for line in config['cell_vectors']))
config_texts.append(" Atomic positions (ang.)\n" + "\n".join(" ".join(map(str, line)) for line in config['positional_data']))
config_texts.append(f" Total energy (eV)\n{config['energy_data']}")
config_texts.append(" Forces (eV ang.^-1)")
for force in config['force_data']:
config_texts.append(f" {' '.join(map(str, force))}")
config_texts.append(f" Stress (kbar)\n{' '.join(map(str, config['stress_data'][:3]))}") # Joining first three stress data values
config_texts.append(f"{' '.join(map(str, config['stress_data'][3:]))}") # Joining last three stress data values
# Joining the header and formatted configs
ml_abn_content = "\n".join([version_text, num_configs_text, max_num_atomtypes_text, max_num_atoms_text, atomtypes_text] + config_texts)
return ml_abn_content
def main():
# Loading all the POSCAR and OUTCAR files
poscar_files = glob.glob("POSCAR_*")
outcar_files = glob.glob("OUTCAR_*")
# Sort the files to keep the configurations in order
poscar_files.sort()
outcar_files.sort()
# Validate if POSCAR and OUTCAR files are of equal number
assert len(poscar_files) == len(outcar_files), "Mismatch in number of POSCAR and OUTCAR files."
header_data = {}
config_data = []
# Temporary variables to store max number of atoms and atom types
max_num_atoms = -float("inf")
max_num_atomtypes = -float("inf")
atomtypes = set()
# Process each POSCAR and OUTCAR file pair
for poscar_file, outcar_file in zip(poscar_files, outcar_files):
comment, scale, cell_vectors, elements_data, num_atoms, coordinate_type, positional_data = read_poscar(poscar_file)
force_data, energy_data, stress_data = read_outcar(outcar_file)
# Update the max_num_atoms and max_num_atomtypes if needed
max_num_atoms = max(max_num_atoms, len(elements_data))
num_atomtypes = len(set(elements_data))
max_num_atomtypes = max(max_num_atomtypes, num_atomtypes)
# Add new atom types to the set
atomtypes.update(elements_data)
# Gathering the data into a single configuration data
config_data.append({
'comment': comment,
'scale': scale,
'cell_vectors': cell_vectors,
'elements_data': elements_data,
'num_atoms': num_atoms,
'coordinate_type': coordinate_type,
'positional_data': positional_data,
'force_data': force_data,
'energy_data': energy_data,
'stress_data': stress_data
})
# Setting the header data
header_data['num_configs'] = len(poscar_files) # Number of configurations is equal to number of POSCAR files
header_data['max_num_atoms'] = max_num_atoms
header_data['max_num_atomtypes'] = max_num_atomtypes
header_data['atomtypes'] = list(atomtypes)
# Form the ML_ABN file content
ml_abn_content = form_ml_abn_file(header_data, config_data)
# Writing to ML_ABN file
with open("ML_ABN", "w") as file:
file.write(ml_abn_content)
if __name__ == "__main__":
main()