# OctaDist Copyright (C) 2019-2024 Rangsiman Ketkaew et al.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
from operator import itemgetter
import numpy as np
import pymatgen
from scipy.spatial import distance
from octadist.src import elements, popup
[docs]
def is_cif(f):
"""
Check if the input file is .cif file format.
Parameters
----------
f : str
User input filename.
Returns
-------
bool : bool
If file is CIF file, return True.
See Also
--------
get_coord_cif :
Find atomic coordinates of molecule from CIF file.
Notes
-----
More details about CIF file format are provided at https://en.wikipedia.org/wiki/Crystallographic_Information_File.
Examples
--------
>>> # example.cif
>>> # example
>>> # _audit_creation_date 2012-10-26T21:09:50-0400
>>> # _audit_creation_method fapswitch 2.2
>>> # _symmetry_space_group_name_H-M P1
>>> # _symmetry_Int_Tables_number 1
>>> # _space_group_crystal_system triclinic
>>> # _cell_length_a 16.012374
>>> # _cell_length_b 14.740457
>>> # _cell_length_c 19.436146
>>> # _cell_angle_alpha 89.939227
>>> # _cell_angle_beta 90.110039
>>> # _cell_angle_gamma 90.015104
>>> # _cell_volume 4587.49671393
>>> #
>>> # loop_
>>> # _atom_site_label
>>> # _atom_site_type_symbol
>>> # _atom_type_description
>>> # _atom_site_fract_x
>>> # _atom_site_fract_y
>>> # _atom_site_fract_z
>>> # _atom_type_partial_charge
>>> # C1 C C_R 0.340882 0.499989 0.500098 0.541130
>>> # C2 C C_R 0.528123 0.048033 0.558069 0.232589
>>> # C3 C C_R 0.499931 0.902862 0.500001 -0.063750
>>> # C4 C C_R 0.500061 0.097137 0.500001 -0.063745
>>> # C5 C C_1 0.499958 0.802655 0.499991 0.266033
>>> # ...
>>> is_cif("example.cif")
True
"""
cif_file = open(f, "r")
nline = cif_file.readlines()
for i in range(len(nline)):
if "loop_" in nline[i]:
return True
return False
[docs]
def get_coord_cif(f):
"""
Get coordinate from .cif file.
Parameters
----------
f : str
User input filename.
Returns
-------
atom : list
Full atomic labels of complex.
coord : array_like
Full atomic coordinates of complex.
Examples
--------
>>> file = "example.cif"
>>> atom, coord = get_coord_cif(file)
>>> atom
['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
>>> coord
array([[18.268051, 11.28912 , 2.565804],
[19.823874, 10.436314, 1.381569],
[19.074466, 9.706294, 3.743576],
[17.364238, 10.733354, 0.657318],
[16.149538, 11.306661, 2.913619],
[18.599941, 12.116308, 4.528988],
[18.364987, 13.407634, 2.249608]])
"""
import warnings
warnings.filterwarnings("ignore")
# works only with pymatgen <= v2021.3.3
structure = pymatgen.Structure.from_file(f)
atom = list(map(lambda x: elements.number_to_symbol(x), structure.atomic_numbers))
coord = structure.cart_coords
return atom, coord
[docs]
def is_xyz(f):
"""
Check if the input file is .xyz file format.
Parameters
----------
f : str
User input filename.
Returns
-------
bool : bool
If file is XYZ file, return True.
See Also
--------
get_coord_xyz :
Find atomic coordinates of molecule from XYZ file.
Examples
--------
>>> # example.xyz
>>> # 20
>>> # Comment: From Excel file
>>> # Fe 6.251705 9.063211 5.914842
>>> # N 8.15961 9.066456 5.463087
>>> # N 6.749414 10.457551 7.179682
>>> # N 5.709997 10.492955 4.658257
>>> # N 4.350474 9.106286 6.356091
>>> # O 5.789096 7.796326 4.611355
>>> # O 6.686381 7.763872 7.209699
>>> # ...
>>> is_xyz("example.xyz")
True
"""
file = open(f, "r")
first_line = file.readline()
# Check if the first line is integer
try:
int(first_line)
except ValueError:
return False
if count_line(f) < 9:
return False
else:
return True
[docs]
def get_coord_xyz(f):
"""
Get coordinate from .xyz file.
Parameters
----------
f : str
User input filename.
Returns
-------
atom : list
Full atomic labels of complex.
coord : array_like
Full atomic coordinates of complex.
Examples
--------
>>> file = "Fe-distorted-complex.xyz"
>>> atom, coord = get_coord_xyz(file)
>>> atom
['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
>>> coord
array([[18.268051, 11.28912 , 2.565804],
[19.823874, 10.436314, 1.381569],
[19.074466, 9.706294, 3.743576],
[17.364238, 10.733354, 0.657318],
[16.149538, 11.306661, 2.913619],
[18.599941, 12.116308, 4.528988],
[18.364987, 13.407634, 2.249608]])
"""
file = open(f, "r")
# read file from 3rd line
line = file.readlines()[2:]
file.close()
atom = []
for l in line:
# read atom on 1st column and insert to list
l_strip = l.strip()
lst = l_strip.split(" ")[0]
atom.append(lst)
file = open(f, "r")
coord = np.loadtxt(file, skiprows=2, usecols=[1, 2, 3])
file.close()
coord = np.asarray(coord, dtype=np.float64)
return atom, coord
[docs]
def is_gaussian(f):
"""
Check if the input file is Gaussian file format.
Parameters
----------
f : str
User input filename.
Returns
-------
bool : bool
If file is Gaussian output file, return True.
See Also
--------
get_coord_gaussian :
Find atomic coordinates of molecule from Gaussian file.
Examples
--------
>>> # gaussian.log
>>> # Standard orientation:
>>> # ---------------------------------------------------------------------
>>> # Center Atomic Atomic Coordinates (Angstroms)
>>> # Number Number Type X Y Z
>>> # ---------------------------------------------------------------------
>>> # 1 26 0 0.000163 1.364285 -0.000039
>>> # 2 8 0 0.684192 0.084335 -1.192008
>>> # 3 8 0 -0.683180 0.083251 1.191173
>>> # 4 7 0 1.639959 1.353157 1.006941
>>> # 5 7 0 -0.563377 2.891083 1.435925
>>> # ...
>>> is_gaussian("gaussian.log")
True
"""
gaussian_file = open(f, "r")
nline = gaussian_file.readlines()
for i in range(len(nline)):
if "Standard orientation:" in nline[i]:
return True
return False
[docs]
def get_coord_gaussian(f):
"""
Extract XYZ coordinate from Gaussian output file.
Parameters
----------
f : str
User input filename.
Returns
-------
atom : list
Full atomic labels of complex.
coord : array_like
Full atomic coordinates of complex.
Examples
--------
>>> file = "Gaussian-Fe-distorted-complex.out"
>>> atom, coord = get_coord_gaussian(file)
>>> atom
['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
>>> coord
array([[18.268051, 11.28912 , 2.565804],
[19.823874, 10.436314, 1.381569],
[19.074466, 9.706294, 3.743576],
[17.364238, 10.733354, 0.657318],
[16.149538, 11.306661, 2.913619],
[18.599941, 12.116308, 4.528988],
[18.364987, 13.407634, 2.249608]])
"""
gaussian_file = open(f, "r")
nline = gaussian_file.readlines()
start = 0
end = 0
atom, coord = [], []
for i in range(len(nline)):
if "Standard orientation:" in nline[i]:
start = i
for i in range(start + 5, len(nline)):
if "---" in nline[i]:
end = i
break
for line in nline[start + 5 : end]:
data = line.split()
data1 = int(data[1])
coord_x = float(data[3])
coord_y = float(data[4])
coord_z = float(data[5])
data1 = elements.number_to_symbol(data1)
atom.append(data1)
coord.append([coord_x, coord_y, coord_z])
gaussian_file.close()
coord = np.asarray(coord, dtype=np.float64)
return atom, coord
[docs]
def is_nwchem(f):
"""
Check if the input file is NWChem file format.
Parameters
----------
f : str
User input filename.
Returns
-------
bool : bool
If file is NWChem output file, return True.
See Also
--------
get_coord_nwchem :
Find atomic coordinates of molecule from NWChem file.
Examples
--------
>>> # nwchem.out
>>> # ----------------------
>>> # Optimization converged
>>> # ----------------------
>>> # ...
>>> # ...
>>> # No. Tag Charge X Y Z
>>> # ---- ---------------- ---------- -------------- -------------- --------------
>>> # 1 Ru(Fragment=1) 44.0000 -3.04059115 -0.08558108 -0.07699482
>>> # 2 C(Fragment=1) 6.0000 -1.62704660 2.40971357 0.63980357
>>> # 3 C(Fragment=1) 6.0000 -0.61467778 0.59634595 1.68841986
>>> # 4 C(Fragment=1) 6.0000 0.31519183 1.41684566 2.30745116
>>> # 5 C(Fragment=1) 6.0000 0.28773462 2.80126911 2.08006241
>>> # ...
>>> is_nwchem("nwchem.out")
True
"""
nwchem_file = open(f, "r")
nline = nwchem_file.readlines()
for i in range(len(nline)):
if "No. of atoms" in nline[i]:
if not int(nline[i].split()[4]):
return False
for j in range(len(nline)):
if "Optimization converged" in nline[j]:
return True
return False
[docs]
def get_coord_nwchem(f):
"""
Extract XYZ coordinate from NWChem output file.
Parameters
----------
f : str
User input filename.
Returns
-------
atom : list
Full atomic labels of complex.
coord : array_like
Full atomic coordinates of complex.
Examples
--------
>>> file = "NWChem-Fe-distorted-complex.out"
>>> atom, coord = get_coord_nwchem(file)
>>> atom
['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
>>> coord
array([[18.268051, 11.28912 , 2.565804],
[19.823874, 10.436314, 1.381569],
[19.074466, 9.706294, 3.743576],
[17.364238, 10.733354, 0.657318],
[16.149538, 11.306661, 2.913619],
[18.599941, 12.116308, 4.528988],
[18.364987, 13.407634, 2.249608]])
"""
nwchem_file = open(f, "r")
nline = nwchem_file.readlines()
start = 0
end = 0
atom, coord = [], []
for i in range(len(nline)):
if "Optimization converged" in nline[i]:
start = i
for i in range(len(nline)):
if "No. of atoms" in nline[i]:
end = int(nline[i].split()[4])
start = start + 18
end = start + end
# The 1st line of coordinate is at 18 lines next to 'Optimization converged'
for line in nline[start:end]:
dat = line.split()
dat1 = int(float(dat[2]))
coord_x = float(dat[3])
coord_y = float(dat[4])
coord_z = float(dat[5])
dat1 = elements.number_to_symbol(dat1)
atom.append(dat1)
coord.append([coord_x, coord_y, coord_z])
nwchem_file.close()
coord = np.asarray(coord, dtype=np.float64)
return atom, coord
[docs]
def is_orca(f):
"""
Check if the input file is ORCA file format.
Parameters
----------
f : str
User input filename.
Returns
-------
bool : bool
If file is ORCA output file, return True.
See Also
--------
get_coord_orca :
Find atomic coordinates of molecule from ORCA file.
Examples
--------
>>> # orca.out
>>> # ---------------------------------
>>> # CARTESIAN COORDINATES (ANGSTROEM)
>>> # ---------------------------------
>>> # C 0.009657 0.000000 0.005576
>>> # C 0.009657 -0.000000 1.394424
>>> # C 1.212436 -0.000000 2.088849
>>> # C 2.415214 0.000000 1.394425
>>> # C 2.415214 -0.000000 0.005575
>>> # ...
>>> is_orca("orca.out")
True
"""
orca_file = open(f, "r")
nline = orca_file.readlines()
for i in range(len(nline)):
if "CARTESIAN COORDINATES (ANGSTROEM)" in nline[i]:
return True
return False
[docs]
def get_coord_orca(f):
"""
Extract XYZ coordinate from ORCA output file.
Parameters
----------
f : str
User input filename.
Returns
-------
atom : list
Full atomic labels of complex.
coord : array_like
Full atomic coordinates of complex.
Examples
--------
>>> file = "ORCA-Fe-distorted-complex.out"
>>> atom, coord = get_coord_orca(file)
>>> atom
['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
>>> coord
array([[18.268051, 11.28912 , 2.565804],
[19.823874, 10.436314, 1.381569],
[19.074466, 9.706294, 3.743576],
[17.364238, 10.733354, 0.657318],
[16.149538, 11.306661, 2.913619],
[18.599941, 12.116308, 4.528988],
[18.364987, 13.407634, 2.249608]])
"""
orca_file = open(f, "r")
nline = orca_file.readlines()
start = 0
end = 0
atom, coord = [], []
for i in range(len(nline)):
if "CARTESIAN COORDINATES (ANGSTROEM)" in nline[i]:
start = i
for i in range(start + 2, len(nline)):
if "---" in nline[i]:
end = i - 1
break
for line in nline[start + 2 : end]:
dat = line.split()
dat1 = dat[0]
coord_x = float(dat[1])
coord_y = float(dat[2])
coord_z = float(dat[3])
atom.append(dat1)
coord.append([coord_x, coord_y, coord_z])
orca_file.close()
coord = np.asarray(coord, dtype=np.float64)
return atom, coord
[docs]
def is_qchem(f):
"""
Check if the input file is Q-Chem file format.
Parameters
----------
f : str
User input filename.
Returns
-------
bool : bool
If file is Q-Chem output file, return True.
See Also
--------
get_coord_qchem :
Find atomic coordinates of molecule from Q-Chem file.
Examples
--------
>>> # qchem.out
>>> # ******************************
>>> # ** OPTIMIZATION CONVERGED **
>>> # ******************************
>>> # Coordinates (Angstroms)
>>> # ATOM X Y Z
>>> # 1 C 0.2681746845 -0.8206222796 -0.3704019386
>>> # 2 C -1.1809302341 -0.5901746612 -0.6772716414
>>> # 3 H -1.6636318262 -1.5373167851 -0.9496501352
>>> # 4 H -1.2829834971 0.0829227646 -1.5389938241
>>> # 5 C -1.9678565203 0.0191922768 0.5346693165
>>> # ...
>>> is_qchem("qchem.out")
True
"""
qchem_file = open(f, "r")
nline = qchem_file.readlines()
for i in range(len(nline)):
if "OPTIMIZATION CONVERGED" in nline[i]:
return True
return False
[docs]
def get_coord_qchem(f):
"""
Extract XYZ coordinate from Q-Chem output file.
Parameters
----------
f : str
User input filename.
Returns
-------
atom : list
Full atomic labels of complex.
coord : array_like
Full atomic coordinates of complex.
Examples
--------
>>> file = "Qchem-Fe-distorted-complex.out"
>>> atom, coord = get_coord_qchem(file)
>>> atom
['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
>>> coord
array([[18.268051, 11.28912 , 2.565804],
[19.823874, 10.436314, 1.381569],
[19.074466, 9.706294, 3.743576],
[17.364238, 10.733354, 0.657318],
[16.149538, 11.306661, 2.913619],
[18.599941, 12.116308, 4.528988],
[18.364987, 13.407634, 2.249608]])
"""
orca_file = open(f, "r")
nline = orca_file.readlines()
start = 0
end = 0
atom, coord = [], []
for i in range(len(nline)):
if "OPTIMIZATION CONVERGED" in nline[i]:
start = i
for i in range(start + 5, len(nline)):
if "Z-matrix Print:" in nline[i]:
end = i - 1
break
for line in nline[start + 5 : end]:
dat = line.split()
dat1 = dat[1]
coord_x = float(dat[2])
coord_y = float(dat[3])
coord_z = float(dat[4])
atom.append(dat1)
coord.append([coord_x, coord_y, coord_z])
orca_file.close()
coord = np.asarray(coord, dtype=np.float64)
return atom, coord
[docs]
def count_line(file=None):
"""
Count lines in an input file.
Parameters
----------
file : str
Absolute or full path of input file.
Returns
-------
i + 1 : int
Number of line in file.
Examples
--------
>>> file = "[Fe(1-bpp)2][BF4]2-HS.xyz"
>>> count_line(file)
27
"""
if file is None:
raise TypeError("count_line needs one argument: input file")
with open(file) as f:
for i, l in enumerate(f):
pass
return i + 1