Source code for octadist.src.io

# OctaDist  Copyright (C) 2019-2026  Rangsiman Ketkaew et al.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

from operator import itemgetter

import numpy as np

from octadist.src import elements, popup



[docs]
def is_cif(f):
    """
    Check if the input file is .cif file format.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    bool : bool
        If file is CIF file, return True.

    See Also
    --------
    get_coord_cif :
        Find atomic coordinates of molecule from CIF file.

    Notes
    -----
    More details about CIF file format are provided at https://en.wikipedia.org/wiki/Crystallographic_Information_File.

    Examples
    --------
    >>> # example.cif
    >>> # example
    >>> # _audit_creation_date              2012-10-26T21:09:50-0400
    >>> # _audit_creation_method            fapswitch 2.2
    >>> # _symmetry_space_group_name_H-M    P1
    >>> # _symmetry_Int_Tables_number       1
    >>> # _space_group_crystal_system       triclinic
    >>> # _cell_length_a                    16.012374
    >>> # _cell_length_b                    14.740457
    >>> # _cell_length_c                    19.436146
    >>> # _cell_angle_alpha                 89.939227
    >>> # _cell_angle_beta                  90.110039
    >>> # _cell_angle_gamma                 90.015104
    >>> # _cell_volume                      4587.49671393
    >>> #
    >>> # loop_
    >>> # _atom_site_label
    >>> # _atom_site_type_symbol
    >>> # _atom_type_description
    >>> # _atom_site_fract_x
    >>> # _atom_site_fract_y
    >>> # _atom_site_fract_z
    >>> # _atom_type_partial_charge
    >>> # C1    C     C_R   0.340882 0.499989 0.500098 0.541130
    >>> # C2    C     C_R   0.528123 0.048033 0.558069 0.232589
    >>> # C3    C     C_R   0.499931 0.902862 0.500001 -0.063750
    >>> # C4    C     C_R   0.500061 0.097137 0.500001 -0.063745
    >>> # C5    C     C_1   0.499958 0.802655 0.499991 0.266033
    >>> # ...
    >>> is_cif("example.cif")
    True

    """
    cif_file = open(f, "r")
    nline = cif_file.readlines()

    for i in range(len(nline)):
        if "loop_" in nline[i]:
            return True

    return False




[docs]
def get_coord_cif(f):
    """
    Get coordinate from .cif file.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.

    Examples
    --------
    >>> file = "example.cif"
    >>> atom, coord = get_coord_cif(file)
    >>> atom
    ['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
    >>> coord
    array([[18.268051, 11.28912 ,  2.565804],
           [19.823874, 10.436314,  1.381569],
           [19.074466,  9.706294,  3.743576],
           [17.364238, 10.733354,  0.657318],
           [16.149538, 11.306661,  2.913619],
           [18.599941, 12.116308,  4.528988],
           [18.364987, 13.407634,  2.249608]])

    """
    import warnings

    warnings.filterwarnings("ignore")

    # Lazy import to avoid requiring pymatgen unless CIF parsing is used
    try:
        from pymatgen.core import Structure
    except ImportError as e:
        raise ImportError(
            "pymatgen is required to read CIF files. Install it or use a different input format."
        ) from e

    # works only with pymatgen <= v2021.3.3
    structure = Structure.from_file(f)
    atom = list(map(lambda x: elements.number_to_symbol(x), structure.atomic_numbers))
    coord = structure.cart_coords

    return atom, coord




[docs]
def is_xyz(f):
    """
    Check if the input file is .xyz file format.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    bool : bool
        If file is XYZ file, return True.

    See Also
    --------
    get_coord_xyz :
        Find atomic coordinates of molecule from XYZ file.

    Examples
    --------
    >>> # example.xyz
    >>> # 20
    >>> # Comment: From Excel file
    >>> # Fe  6.251705    9.063211    5.914842
    >>> # N   8.15961     9.066456    5.463087
    >>> # N   6.749414    10.457551   7.179682
    >>> # N   5.709997    10.492955   4.658257
    >>> # N   4.350474    9.106286    6.356091
    >>> # O   5.789096    7.796326    4.611355
    >>> # O   6.686381    7.763872    7.209699
    >>> # ...
    >>> is_xyz("example.xyz")
    True

    """
    file = open(f, "r")

    first_line = file.readline()

    # Check if the first line is integer
    try:
        int(first_line)
    except ValueError:
        return False

    if count_line(f) < 9:
        return False
    else:
        return True




[docs]
def get_coord_xyz(f):
    """
    Get coordinate from .xyz file.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.

    Examples
    --------
    >>> file = "Fe-distorted-complex.xyz"
    >>> atom, coord = get_coord_xyz(file)
    >>> atom
    ['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
    >>> coord
    array([[18.268051, 11.28912 ,  2.565804],
           [19.823874, 10.436314,  1.381569],
           [19.074466,  9.706294,  3.743576],
           [17.364238, 10.733354,  0.657318],
           [16.149538, 11.306661,  2.913619],
           [18.599941, 12.116308,  4.528988],
           [18.364987, 13.407634,  2.249608]])

    """
    file = open(f, "r")
    # read file from 3rd line
    line = file.readlines()[2:]
    file.close()

    atom = []
    for l in line:
        # read atom symbol on 1st column using arbitrary whitespace splitter
        # handles spaces and tabs in XYZ files
        l_strip = l.strip()
        if not l_strip:
            continue
        lst = l_strip.split()[0]
        atom.append(lst)

    file = open(f, "r")
    coord = np.loadtxt(file, skiprows=2, usecols=[1, 2, 3])
    file.close()

    coord = np.asarray(coord, dtype=np.float64)

    return atom, coord




[docs]
def is_gaussian(f):
    """
    Check if the input file is Gaussian file format.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    bool : bool
        If file is Gaussian output file, return True.

    See Also
    --------
    get_coord_gaussian :
        Find atomic coordinates of molecule from Gaussian file.

    Examples
    --------
    >>> # gaussian.log
    >>> #                             Standard orientation:
    >>> # ---------------------------------------------------------------------
    >>> # Center     Atomic      Atomic             Coordinates (Angstroms)
    >>> # Number     Number       Type             X           Y           Z
    >>> # ---------------------------------------------------------------------
    >>> #      1         26           0        0.000163    1.364285   -0.000039
    >>> #      2          8           0        0.684192    0.084335   -1.192008
    >>> #      3          8           0       -0.683180    0.083251    1.191173
    >>> #      4          7           0        1.639959    1.353157    1.006941
    >>> #      5          7           0       -0.563377    2.891083    1.435925
    >>> # ...
    >>> is_gaussian("gaussian.log")
    True

    """
    gaussian_file = open(f, "r")
    nline = gaussian_file.readlines()

    for i in range(len(nline)):
        if "Standard orientation:" in nline[i]:
            return True

    return False




[docs]
def get_coord_gaussian(f):
    """
    Extract XYZ coordinate from Gaussian output file.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.

    Examples
    --------
    >>> file = "Gaussian-Fe-distorted-complex.out"
    >>> atom, coord = get_coord_gaussian(file)
    >>> atom
    ['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
    >>> coord
    array([[18.268051, 11.28912 ,  2.565804],
           [19.823874, 10.436314,  1.381569],
           [19.074466,  9.706294,  3.743576],
           [17.364238, 10.733354,  0.657318],
           [16.149538, 11.306661,  2.913619],
           [18.599941, 12.116308,  4.528988],
           [18.364987, 13.407634,  2.249608]])

    """
    gaussian_file = open(f, "r")
    nline = gaussian_file.readlines()

    start = 0
    end = 0
    atom, coord = [], []
    for i in range(len(nline)):
        if "Standard orientation:" in nline[i]:
            start = i

    for i in range(start + 5, len(nline)):
        if "---" in nline[i]:
            end = i
            break

    for line in nline[start + 5 : end]:
        data = line.split()
        data1 = int(data[1])
        coord_x = float(data[3])
        coord_y = float(data[4])
        coord_z = float(data[5])
        data1 = elements.number_to_symbol(data1)
        atom.append(data1)
        coord.append([coord_x, coord_y, coord_z])

    gaussian_file.close()

    coord = np.asarray(coord, dtype=np.float64)

    return atom, coord




[docs]
def is_nwchem(f):
    """
    Check if the input file is NWChem file format.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    bool : bool
        If file is NWChem output file, return True.

    See Also
    --------
    get_coord_nwchem :
        Find atomic coordinates of molecule from NWChem file.

    Examples
    --------
    >>> # nwchem.out
    >>> #   ----------------------
    >>> #   Optimization converged
    >>> #   ----------------------
    >>> # ...
    >>> # ...
    >>> #  No.       Tag          Charge          X              Y              Z
    >>> # ---- ---------------- ---------- -------------- -------------- --------------
    >>> #    1 Ru(Fragment=1)      44.0000    -3.04059115    -0.08558108    -0.07699482
    >>> #    2 C(Fragment=1)        6.0000    -1.62704660     2.40971357     0.63980357
    >>> #    3 C(Fragment=1)        6.0000    -0.61467778     0.59634595     1.68841986
    >>> #    4 C(Fragment=1)        6.0000     0.31519183     1.41684566     2.30745116
    >>> #    5 C(Fragment=1)        6.0000     0.28773462     2.80126911     2.08006241
    >>> # ...
    >>> is_nwchem("nwchem.out")
    True

    """
    nwchem_file = open(f, "r")
    nline = nwchem_file.readlines()

    for i in range(len(nline)):
        if "No. of atoms" in nline[i]:
            if not int(nline[i].split()[4]):
                return False

    for j in range(len(nline)):
        if "Optimization converged" in nline[j]:
            return True

    return False




[docs]
def get_coord_nwchem(f):
    """
    Extract XYZ coordinate from NWChem output file.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.

    Examples
    --------
    >>> file = "NWChem-Fe-distorted-complex.out"
    >>> atom, coord = get_coord_nwchem(file)
    >>> atom
    ['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
    >>> coord
    array([[18.268051, 11.28912 ,  2.565804],
           [19.823874, 10.436314,  1.381569],
           [19.074466,  9.706294,  3.743576],
           [17.364238, 10.733354,  0.657318],
           [16.149538, 11.306661,  2.913619],
           [18.599941, 12.116308,  4.528988],
           [18.364987, 13.407634,  2.249608]])

    """
    nwchem_file = open(f, "r")
    nline = nwchem_file.readlines()

    start = 0
    end = 0
    atom, coord = [], []
    for i in range(len(nline)):
        if "Optimization converged" in nline[i]:
            start = i

    for i in range(len(nline)):
        if "No. of atoms" in nline[i]:
            end = int(nline[i].split()[4])

    start = start + 18
    end = start + end
    # The 1st line of coordinate is at 18 lines next to 'Optimization converged'
    for line in nline[start:end]:
        dat = line.split()
        dat1 = int(float(dat[2]))
        coord_x = float(dat[3])
        coord_y = float(dat[4])
        coord_z = float(dat[5])
        dat1 = elements.number_to_symbol(dat1)
        atom.append(dat1)
        coord.append([coord_x, coord_y, coord_z])

    nwchem_file.close()

    coord = np.asarray(coord, dtype=np.float64)

    return atom, coord




[docs]
def is_orca(f):
    """
    Check if the input file is ORCA file format.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    bool : bool
        If file is ORCA output file, return True.

    See Also
    --------
    get_coord_orca :
        Find atomic coordinates of molecule from ORCA file.

    Examples
    --------
    >>> # orca.out
    >>> # ---------------------------------
    >>> # CARTESIAN COORDINATES (ANGSTROEM)
    >>> # ---------------------------------
    >>> #   C      0.009657    0.000000    0.005576
    >>> #   C      0.009657   -0.000000    1.394424
    >>> #   C      1.212436   -0.000000    2.088849
    >>> #   C      2.415214    0.000000    1.394425
    >>> #   C      2.415214   -0.000000    0.005575
    >>> # ...
    >>> is_orca("orca.out")
    True

    """
    orca_file = open(f, "r")
    nline = orca_file.readlines()
    for i in range(len(nline)):
        if "CARTESIAN COORDINATES (ANGSTROEM)" in nline[i]:
            return True

    return False




[docs]
def get_coord_orca(f):
    """
    Extract XYZ coordinate from ORCA output file.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.

    Examples
    --------
    >>> file = "ORCA-Fe-distorted-complex.out"
    >>> atom, coord = get_coord_orca(file)
    >>> atom
    ['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
    >>> coord
    array([[18.268051, 11.28912 ,  2.565804],
           [19.823874, 10.436314,  1.381569],
           [19.074466,  9.706294,  3.743576],
           [17.364238, 10.733354,  0.657318],
           [16.149538, 11.306661,  2.913619],
           [18.599941, 12.116308,  4.528988],
           [18.364987, 13.407634,  2.249608]])

    """
    orca_file = open(f, "r")
    nline = orca_file.readlines()

    start = 0
    end = 0
    atom, coord = [], []
    for i in range(len(nline)):
        if "CARTESIAN COORDINATES (ANGSTROEM)" in nline[i]:
            start = i

    for i in range(start + 2, len(nline)):
        if "---" in nline[i]:
            end = i - 1
            break

    for line in nline[start + 2 : end]:
        dat = line.split()
        dat1 = dat[0]
        coord_x = float(dat[1])
        coord_y = float(dat[2])
        coord_z = float(dat[3])
        atom.append(dat1)
        coord.append([coord_x, coord_y, coord_z])

    orca_file.close()

    coord = np.asarray(coord, dtype=np.float64)

    return atom, coord




[docs]
def is_qchem(f):
    """
    Check if the input file is Q-Chem file format.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    bool : bool
        If file is Q-Chem output file, return True.

    See Also
    --------
    get_coord_qchem :
        Find atomic coordinates of molecule from Q-Chem file.

    Examples
    --------
    >>> # qchem.out
    >>> # ******************************
    >>> # **  OPTIMIZATION CONVERGED  **
    >>> # ******************************
    >>> #                            Coordinates (Angstroms)
    >>> #     ATOM                X               Y               Z
    >>> #      1  C         0.2681746845   -0.8206222796   -0.3704019386
    >>> #      2  C        -1.1809302341   -0.5901746612   -0.6772716414
    >>> #      3  H        -1.6636318262   -1.5373167851   -0.9496501352
    >>> #      4  H        -1.2829834971    0.0829227646   -1.5389938241
    >>> #      5  C        -1.9678565203    0.0191922768    0.5346693165
    >>> # ...
    >>> is_qchem("qchem.out")
    True

    """
    qchem_file = open(f, "r")
    nline = qchem_file.readlines()

    for i in range(len(nline)):
        if "OPTIMIZATION CONVERGED" in nline[i]:
            return True

    return False




[docs]
def get_coord_qchem(f):
    """
    Extract XYZ coordinate from Q-Chem output file.

    Parameters
    ----------
    f : str
        User input filename.

    Returns
    -------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.

    Examples
    --------
    >>> file = "Qchem-Fe-distorted-complex.out"
    >>> atom, coord = get_coord_qchem(file)
    >>> atom
    ['Fe', 'O', 'O', 'N', 'N', 'N', 'N']
    >>> coord
    array([[18.268051, 11.28912 ,  2.565804],
           [19.823874, 10.436314,  1.381569],
           [19.074466,  9.706294,  3.743576],
           [17.364238, 10.733354,  0.657318],
           [16.149538, 11.306661,  2.913619],
           [18.599941, 12.116308,  4.528988],
           [18.364987, 13.407634,  2.249608]])

    """
    orca_file = open(f, "r")
    nline = orca_file.readlines()

    start = 0
    end = 0
    atom, coord = [], []
    for i in range(len(nline)):
        if "OPTIMIZATION CONVERGED" in nline[i]:
            start = i

    for i in range(start + 5, len(nline)):
        if "Z-matrix Print:" in nline[i]:
            end = i - 1
            break

    for line in nline[start + 5 : end]:
        dat = line.split()
        dat1 = dat[1]
        coord_x = float(dat[2])
        coord_y = float(dat[3])
        coord_z = float(dat[4])
        atom.append(dat1)
        coord.append([coord_x, coord_y, coord_z])

    orca_file.close()

    coord = np.asarray(coord, dtype=np.float64)

    return atom, coord




[docs]
def count_line(file=None):
    """
    Count lines in an input file.

    Parameters
    ----------
    file : str
        Absolute or full path of input file.

    Returns
    -------
    i + 1 : int
        Number of line in file.

    Examples
    --------
    >>> file = "[Fe(1-bpp)2][BF4]2-HS.xyz"
    >>> count_line(file)
    27

    """
    if file is None:
        raise TypeError("count_line needs one argument: input file")

    with open(file) as f:
        for i, l in enumerate(f):
            pass

    return i + 1




[docs]
def extract_coord(file=None):
    """
    Check file type, read data, extract atomic symbols and cartesian coordinate from
    a structure input file provided by the user. This function can efficiently manupulate I/O process.
    File types currently supported are listed in notes below. Other file formats can also be implemented
    easily within this module.

    Parameters
    ----------
    file : str
        User input filename.

    Returns
    -------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.

    See Also
    --------
    octadist.main.OctaDist.open_file :
        Open file dialog and to browse input file.
    octadist.main.OctaDist.search_coord :
        Search octahedral structure in complex.

    Notes
    -----
    The following are file types supported by the current virsion of OctaDist:

    - ``CIF``
    - ``XYZ``
    - ``Gaussian``
    - ``NWChem``
    - ``ORCA``
    - ``Q-Chem``

    Examples
    --------
    >>> file = "[Fe(1-bpp)2][BF4]2-HS.xyz"
    >>> atom, coord = extract_coord(file)
    >>> atom
    ['Fe', 'N', 'N', 'N', 'N', 'N', 'N', 'C', 'C']
    >>> coord
    array([[-1.95348286e+00,  4.51770478e+00,  1.47855811e+01],
           [-1.87618286e+00,  4.48070478e+00,  1.26484811e+01],
           [-2.18698286e+00,  4.34540478e+00,  1.69060811e+01],
           [-4.88286000e-03,  3.69060478e+00,  1.42392811e+01],
           [-1.17538286e+00,  6.38340478e+00,  1.56457811e+01],
           [-2.75078286e+00,  2.50260478e+00,  1.51806811e+01],
           [-3.90128286e+00,  5.27750478e+00,  1.40814811e+01],
           [-6.14953418e+00,  8.30666180e+00,  2.91361978e+01],
           [-8.59995241e+00,  7.11630815e+00,  4.52898814e+01]])

    """
    if file is None:
        raise TypeError("extract_coord needs one argument: input file")

    atom = []
    coord = np.array([])
    is_ftype_correct = True
    is_format_correct = True
    is_coord_correct = True

    # Check file extension
    # --- CIF ---
    if file.endswith(".cif"):
        if is_cif(file):
            atom, coord = get_coord_cif(file)
        else:
            is_ftype_correct = False
            is_coord_correct = False
    # --- XYZ ---
    elif file.endswith(".xyz"):
        if is_xyz(file):
            atom, coord = get_coord_xyz(file)
        else:
            is_ftype_correct = False
            is_coord_correct = False
    # --- Other formats ---
    elif file.endswith(".out") or file.endswith(".log"):
        # Gaussian
        if is_gaussian(file):
            atom, coord = get_coord_gaussian(file)
        # NWChem
        elif is_nwchem(file):
            atom, coord = get_coord_nwchem(file)
        # ORCA
        elif is_orca(file):
            atom, coord = get_coord_orca(file)
        # Q-Chem
        elif is_qchem(file):
            atom, coord = get_coord_qchem(file)
        else:
            is_coord_correct = False
    else:
        is_format_correct = False
        is_coord_correct = False

    if not is_ftype_correct:
        popup.err_invalid_ftype()

    if not is_format_correct:
        popup.err_wrong_format()

    if is_coord_correct:
        # if atom and coord are correct, remove empty string in list
        atom = list(filter(None, atom))
        return atom, coord
    else:
        # return empty atom and coord
        return atom, coord




[docs]
def find_metal(atom=None, coord=None):
    """
    Count the number of metal center atom in complex.

    Parameters
    ----------
    atom : list or None
        Full atomic labels of complex.
        Default is None.
    coord : array_like or None
        Full atomic coordinates of complex.
        Default is None.

    Returns
    -------
    atom_metal : list
        Atomic labels of metal center atom.
    coord_metal : array_like
        Atomic coordinates of metal center atom.
    index_metal : list
        Indices of metal atoms found.

    See Also
    --------
    octadist.src.elements.check_atom :
        Convert atomic number to atomic symbol and vice versa.

    Examples
    --------
    >>> atom = ['Fe', 'N', 'N', 'N', 'N', 'N', 'N']
    >>> coord = [[-1.95348286e+00,  4.51770478e+00,  1.47855811e+01],
                 [-1.87618286e+00,  4.48070478e+00,  1.26484811e+01],
                 [-3.90128286e+00,  5.27750478e+00,  1.40814811e+01],
                 [-4.88286000e-03,  3.69060478e+00,  1.42392811e+01],
                 [-2.18698286e+00,  4.34540478e+00,  1.69060811e+01],
                 [-1.17538286e+00,  6.38340478e+00,  1.56457811e+01],
                 [-2.75078286e+00,  2.50260478e+00,  1.51806811e+01]]
    >>> atom_metal, coord_metal = find_metal(atom, coord)
    >>> atom_metal
    ['Fe']
    >>> coord_metal
    array([[-1.95348286,  4.51770478, 14.7855811 ]])

    """
    if atom is None or coord is None:
        raise TypeError("find_metal needs two arguments: atom, coord")

    atom_metal = []
    coord_metal = []
    index_metal = []

    for i in range(len(atom)):
        number = elements.number_to_symbol(atom[i])
        if (
            21 <= number <= 30
            or 39 <= number <= 48
            or 57 <= number <= 80
            or 89 <= number <= 109
        ):
            atom_metal.append(atom[i])
            coord_metal.append(coord[i])
            index_metal.append(i)

    coord_metal = np.asarray(coord_metal, dtype=np.float64)

    return atom_metal, coord_metal, index_metal




[docs]
def extract_octa(atom, coord, ref_index=0, cutoff_ref_ligand=2.8):
    """
    Search the octahedral structure in complex and return atoms and coordinates.

    Parameters
    ----------
    atom : list
        Full atomic labels of complex.
    coord : array_like
        Full atomic coordinates of complex.
    ref_index : int
        Index of the reference to be used as the center atom for neighbor atoms
        in octahedral structure of the complex. Python-based index.
        Default is 0.
    cutoff_ref_ligand : float, optional
        Cutoff distance for screening bond distance between reference and ligand atoms.
        Default is 2.8.

    Returns
    -------
    atom_octa : list
        Atomic labels of octahedral structure.
    coord_octa : array_like
        Atomic coordinates of octahedral structure.

    See Also
    --------
    find_metal :
        Find metals in complex.
    octadist.main.OctaDist.search_coord :
        Search octahedral structure in complex.

    Examples
    --------
    >>> atom = ['Fe', 'N', 'N', 'N', 'N', 'N', 'N', 'C', 'C']
    >>> coord = [[-1.95348286e+00,  4.51770478e+00,  1.47855811e+01],
                 [-1.87618286e+00,  4.48070478e+00,  1.26484811e+01],
                 [-3.90128286e+00,  5.27750478e+00,  1.40814811e+01],
                 [-4.88286000e-03,  3.69060478e+00,  1.42392811e+01],
                 [-2.18698286e+00,  4.34540478e+00,  1.69060811e+01],
                 [-1.17538286e+00,  6.38340478e+00,  1.56457811e+01],
                 [-2.75078286e+00,  2.50260478e+00,  1.51806811e+01],
                 [-6.14953418e+00,  8.30666180e+00,  2.91361978e+01],
                 [-8.59995241e+00,  7.11630815e+00,  4.52898814e+01]]
    >>> atom_octa, coord_octa = extract_octa(atom, coord)
    >>> atom_octa
    ['Fe', 'N', 'N', 'N', 'N', 'N', 'N']
    >>> coord_octa
    array([[-1.95348286e+00,  4.51770478e+00,  1.47855811e+01],
           [-1.87618286e+00,  4.48070478e+00,  1.26484811e+01],
           [-2.18698286e+00,  4.34540478e+00,  1.69060811e+01],
           [-4.88286000e-03,  3.69060478e+00,  1.42392811e+01],
           [-1.17538286e+00,  6.38340478e+00,  1.56457811e+01],
           [-2.75078286e+00,  2.50260478e+00,  1.51806811e+01],
           [-3.90128286e+00,  5.27750478e+00,  1.40814811e+01]])

    """
    if ref_index < 0:
        raise ValueError(
            "index of the reference center atom must be equal or greater than zero"
        )
    elif ref_index + 1 > len(atom):
        raise ValueError(
            "index of the reference center atom is greater than the total number of atoms in the complex."
        )

    # Lazy import to avoid requiring scipy unless octa extraction is used
    from scipy.spatial import distance

    dist_list = []
    for i in range(len(list(atom))):
        dist = distance.euclidean(coord[ref_index], coord[i])
        if dist <= cutoff_ref_ligand:
            dist_list.append([atom[i], coord[i], dist])

    # sort list of tuples by distance in ascending order
    dist_list.sort(key=itemgetter(2))

    # Keep only first 7 atoms
    dist_list = dist_list[:7]
    atom_octa, coord_octa, dist = zip(*dist_list)
    atom_octa = list(atom_octa)
    coord_octa = np.asarray(coord_octa, dtype=np.float64)

    return atom_octa, coord_octa