import rdkit
from rdkit import Chem
from rdkit.Chem import Draw

ex_smiles_list = [
    'Cc1ccccc1', # Methylbenzene
    'CCO', # Ethanol
    'Cn1cnc2c1c(=O)n(C)c(=O)n2C' # Caffiene
]

# Chem.MolFromSmiles converts SMILES into an molecule that can be edited
# Draw.MolsToImage converts the molecule into an image
# Draw.MolsToGridImage converts a list of molecules into an image
ex_mol_list = [Chem.MolFromSmiles(smiles) for smiles in ex_smiles_list]

Draw.MolsToGridImage(ex_mol_list)

# Generation of straight-chain alkanes
from rdkit.Chem import AllChem

elem_alkane_smiles = 'C' # Methane

def generate_alkane_straight(n: int = 1):
    """
    Generate a straight-chain alkane with a specified number of carbon atoms.

    This function constructs a linear alkane based on the input number of carbons 
    and returns the corresponding RDKit molecule object. The output can be visualized 
    or further modified using RDKit functions.

    Parameters
    ----------
    n : int, optional
        Number of carbon atoms in the straight-chain alkane. Defaults to 1 (methane).

    Returns
    -------
    rdkit.Chem.Mol
        An RDKit molecule object representing the straight-chain alkane.

    Examples
    --------
    >>> mol = generate_alkane_straight(1)
    >>> Draw.MolToImage(mol)
    # methane structure

    >>> mol = generate_alkane_straight(5)
    >>> Draw.MolToImage(mol)
    # pentane structure

    >>> mol = generate_alkane_straight(20)
    >>> Draw.MolToImage(mol)
    # icosane structure

    Notes
    -----
    - The function internally uses a predefined `elem_alkane_smiles` string to construct the SMILES representation.
    - The resulting molecule is automatically sanitized by RDKit.
    """

    
    new_alkane_smiles = elem_alkane_smiles * n
    new_alkane = Chem.MolFromSmiles(new_alkane_smiles)

    print(f"------------ generated molecule {Chem.MolToSmiles(new_alkane)} with {n} carbons")
    return new_alkane

Draw.MolToImage(generate_alkane_straight(15))

------------ generated molecule CCCCCCCCCCCCCCC with 15 carbons

# Code for Draw module settings
from rdkit.Chem.Draw import IPythonConsole

IPythonConsole.drawOptions.addAtomIndices = True
IPythonConsole.drawOptions.addBondIndices = False
IPythonConsole.molSize = 300,300

# Branching alkane generation

def generate_alkane_branched(
        mol: Chem.Mol = Chem.MolFromSmiles("CCCC"),
        pos_branch: list = [],
        len_branch: list = []
    ) -> Chem.Mol:
    """
    Generate a branched alkane from a straight-chain alkane using RDKit.

    This function extends a base alkane (typically generated by 
    `generate_alkane_straight`) by attaching carbon branches at 
    user-specified positions. Each branch is defined by its starting 
    carbon atom index and length. The function ensures valence rules 
    are respected and sanitizes the final molecule before returning it.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        The base alkane molecule to which branches will be added. 
        Default is butane (`CCCC`).
    pos_branch : list of int, optional
        List of indices indicating the carbon atoms where 
        branches should be attached. If empty, the function returns 
        the input molecule unchanged.
    len_branch : list of int, optional
        List of branch lengths corresponding to `pos_branch`. Each 
        integer specifies the number of carbons in the respective 
        branch.

    Returns
    -------
    rdkit.Chem.Mol
        The branched alkane molecule with new substituents added. 
        If invalid input is provided (e.g., mismatched list lengths 
        or out-of-range branch positions), the original molecule 
        is returned unchanged.

    Notes
    -----
    - Branch positions are **0-based** (changed 090925), consistent with chemical 
      notation, but internally converted to 0-based RDKit indices.
    - The function performs input validation to ensure:
        * No excessive number of branches relative to chain length.
        * Equal length of `pos_branch` and `len_branch`.
        * Valid atom indices for all branch positions.
    - Sanitization is performed at the end to ensure the molecule 
      conforms to RDKit's chemical valence rules.

    Examples
    --------
    >>> from rdkit import Chem
    >>> mol = generate_alkane_straight(6)  # straight-chain hexane
    >>> branched = generate_alkane_branched(mol, [2, 4], [2, 1])
    >>> Chem.MolToSmiles(branched)
    'CCC(CC)CC(C)C'   # branched hexane

    See Also
    --------
    generate_alkane_straight : Generate a linear (straight-chain) alkane.
    """

    branched_alkane = Chem.RWMol(mol)
    atom_count = branched_alkane.GetNumAtoms()

    # Input validation
    if len(pos_branch) > 2 * atom_count:
        print("Error: Too many branches.")
        return mol
    elif len(pos_branch) < len(len_branch):
        print("Error: More branch lengths specified than positions.")
        return mol
    elif len(pos_branch) > len(len_branch):
        print("Error: Missing branch length specification(s).")
        return mol

    # Add branches
    try:
        for pos, length in zip(pos_branch, len_branch):
            previous_atom_index = pos

            if previous_atom_index >= atom_count:
                print(f"Error: Branch position {pos} is out of range.")
                return mol

            for _ in range(length):
                new_atom_index = branched_alkane.AddAtom(Chem.Atom(6))  # Carbon atom
                branched_alkane.AddBond(previous_atom_index, new_atom_index, Chem.BondType.SINGLE)
                previous_atom_index = new_atom_index

    except Exception as e:
        print(f"Error: {e}")
        return mol
    
    # Sanitize molecule
    try:
        Chem.SanitizeMol(branched_alkane)
    except Exception as e:
        print(f"Error: {e}")
        return mol

    print(f"---BRANCH--- generated molecule {Chem.MolToSmiles(branched_alkane)} "
          f"at positions {[x for x in pos_branch]}")
    return branched_alkane


# Example usage
test_alkane_straight = generate_alkane_straight(8)
test_alkane_branched = generate_alkane_branched(test_alkane_straight, [3, 4, 6], [2, 5, 3])
Draw.MolsToGridImage(
    [test_alkane_straight, test_alkane_branched],
    subImgSize = (300, 300),
    molsPerRow = 4
)

------------ generated molecule CCCCCCCC with 8 carbons
---BRANCH--- generated molecule CCCCCC(CC(C)CCC)C(CC)CCC at positions [3, 4, 6]

# Cyclization of generated alkanes.

def generate_alkane_cyclized(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCC'), 
        start: list = [], 
        end: list = []
    ) -> Chem.Mol:
    """
    Generate a cyclized alkane by forming new bonds between 
    specified pairs of carbon atoms.

    This function takes an existing alkane (or branched alkane) 
    and introduces ring closures by connecting user-specified 
    start and end atom indices. The resulting molecule is sanitized 
    to ensure valence rules are satisfied.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        The input molecule to be cyclized. By default, this is 
        butane (`CCCC`). In practice, this should be a linear or 
        branched alkane generated by `generate_alkane_straight()` 
        or `generate_alkane_branched()`.
    start : list of int, optional
        List of atom indices (0-based) defining the starting 
        carbons for ring closure. If empty, no cyclization occurs.
    end : list of int, optional
        List of atom indices (0-based) corresponding to `start`, 
        defining the ending carbons for each closure.

    Returns
    -------
    rdkit.Chem.Mol
        A new molecule with the specified ring closures. 
        If invalid input is detected (e.g., mismatched list 
        lengths, invalid indices, or bond preconditions), 
        the original molecule is returned unchanged.

    Notes
    -----
    - Both `start` and `end` must have the same length, with 
      each pair representing one cyclization event.
    - RDKit internally checks that no atom bonds to itself 
      and that duplicate bonds are not created.
    - After modification, the molecule is sanitized to 
      enforce chemical validity.

    Examples
    --------
    >>> from rdkit import Chem
    >>> mol = Chem.MolFromSmiles("CCCCCC")   # hexane
    >>> cyclized = generate_alkane_cyclized(mol, [0], [5])
    >>> Chem.MolToSmiles(cyclized)
    'C1CCCCC1'   # cyclohexane
    """
    cyclized_alkane = Chem.RWMol(mol)
    atom_count = cyclized_alkane.GetNumAtoms()

    # Check if parameters are valid
    if len(start) != len(end):
        print("Error: Cyclization does not pair up correctly.")
        return mol
    elif len(start) >= atom_count:
        print("Error: too many cycles.")
        return mol
    
    try:
        for pos_start, pos_end in zip(start, end):
            # This function already self checks if we're adding a bond to itself
            # This function also checks if the bond already exists
            # It will shoot out as a pre-condition error
            cyclized_alkane.AddBond(pos_start, pos_end, Chem.BondType.SINGLE)

        # Sanitize and check for exceptions
        Chem.SanitizeMol(cyclized_alkane)

    except Exception as e:
        print(f"Error: {e}")
        return mol
    
    print(f"---CYCLE---- generated molecule {Chem.MolToSmiles(cyclized_alkane)} starting and closing at {[x for x in start]} and {[x for x in end]}")
    return cyclized_alkane


# Example
test_alkane_straight = generate_alkane_straight(6)
test_alkane_cyclized = generate_alkane_cyclized(test_alkane_straight, [0, 1], [4, 4])

Draw.MolsToGridImage(
    [test_alkane_straight, test_alkane_cyclized],
    subImgSize = (300, 300),
    molsPerRow = 4
)

------------ generated molecule CCCCCC with 6 carbons
---CYCLE---- generated molecule CC12CCC1C2 starting and closing at [0, 1] and [4, 4]

# Generate alkenes and alkynes

def generate_unsaturated(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCC'), 
        pos_double: list = [], 
        pos_triple: list = []
    ) -> Chem.Mol:
    """
    Generate an unsaturated hydrocarbon by introducing double and/or triple bonds
    into an existing alkane framework.

    Parameters
    ----------
    mol : Chem.Mol, default = Chem.MolFromSmiles('CCCC')
        The input molecule, typically a straight-chain, branched, or cyclized alkane.
    pos_double : list of int, optional
        Indices of bonds to be converted from single to double bonds.
    pos_triple : list of int, optional
        Indices of bonds to be converted from single to triple bonds.

    Returns
    -------
    Chem.Mol
        A new molecule object containing the specified double and/or triple bonds.
        If an error occurs during bond transformation or sanitization, the original
        molecule is returned unchanged.

    Notes
    -----
    - The same bond cannot be assigned as both a double and a triple bond.
    - Double and triple bonds must not be placed adjacent in a way that violates
      carbon valency.
    - Within each parameter, duplicate entries or overlapping indices should be avoided.
    - The function attempts to sanitize the molecule after modifications; invalid
      configurations will raise an exception.

    Examples
    --------
    >>> from rdkit import Chem
    >>> mol = generate_alkane_straight(4)   # butane
    >>> unsaturated = generate_unsaturated(mol, [0], [])
    >>> Chem.MolToSmiles(unsaturated)
    'C=CCC'   # 1-butene
    """
    unsaturated = Chem.RWMol(mol)
    bond_count = unsaturated.GetNumBonds()

    # Check if the inputs are valid, assume each parameter itself is valid
    if len(pos_double) + len(pos_triple) > bond_count:
        print("Error: More bonds are requested to transform than the molecule itself.")
        return mol
    
    try:    
        for pos in pos_double:
            bond = unsaturated.GetBondWithIdx(pos)
            bond.SetBondType(Chem.BondType.DOUBLE)

        for pos in pos_triple:
            bond = unsaturated.GetBondWithIdx(pos)
            bond.SetBondType(Chem.BondType.TRIPLE)

        Chem.SanitizeMol(unsaturated)

    except Exception as e:
        print(f"Error: {e}")
        return mol
    
    print(f"---ADDBOND-- generated molecule {Chem.MolToSmiles(unsaturated)} with double bonds are added at {[x for x in pos_double]}; "
          f"triple bonds are added at {[x for x in pos_triple]}")
    return unsaturated


# Example
test_alkane = generate_alkane_straight(4)
test_unsaturated = generate_unsaturated(test_alkane, pos_double=[0], pos_triple=[2])

Draw.MolsToGridImage(
    [test_alkane, test_unsaturated],
    subImgSize = (300, 300),
    molsPerRow = 4
)

------------ generated molecule CCCC with 4 carbons
---ADDBOND-- generated molecule C#CC=C with double bonds are added at [0]; triple bonds are added at [2]

# Generate methylbenzene
generate_unsaturated(generate_alkane_cyclized(Chem.MolFromSmiles('CCCCCCC'), [1], [6]), [1, 3, 5], [])

---CYCLE---- generated molecule CC1CCCCC1 starting and closing at [1] and [6]
---ADDBOND-- generated molecule Cc1ccccc1 with double bonds are added at [1, 3, 5]; triple bonds are added at []

# Generate haloalkanes and alcohols

def generate_hetero(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCC'), 
        pos_Cl: list = [], 
        pos_Br: list = [], 
        pos_OH: list = [], 
        pos_NH2: list = [], 
        pos_SH: list = []
    ) -> Chem.Mol:
    """
    Add halogen and heteroatom substituents (Cl, Br, OH, NH2, SH) to a hydrocarbon backbone.

    This function modifies an input molecule (typically an alkane or related hydrocarbon) 
    by substituting hydrogen atoms with specified heteroatoms at given carbon positions.
    The function supports chloroalkanes, bromoalkanes, alcohols, amines, and thiols.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        Base molecule to be modified. Defaults to butane (`CCCC`).
    pos_Cl : list of int, optional
        Atom indices where chlorine atoms should be attached.
    pos_Br : list of int, optional
        Atom indices where bromine atoms should be attached.
    pos_OH : list of int, optional
        Atom indices where hydroxyl groups should be attached.
    pos_NH2 : list of int, optional
        Atom indices where amino groups should be attached.
    pos_SH : list of int, optional
        Atom indices where thiol groups should be attached.

    Returns
    -------
    rdkit.Chem.Mol
        The modified RDKit molecule with the requested substitutions applied. 
        If an error occurs (invalid positions, sanitization failure), 
        the original input molecule is returned.

    Notes
    -----
    - Atom indices are 0-based and must correspond to existing carbon atoms in `mol`.
    - The function does not currently enforce valence rules beyond RDKit's sanitization.
    - Substituents are added as single bonds to the specified carbon atoms.

    Examples
    --------
    >>> from rdkit import Chem
    >>> mol = generate_alkane_straight(4)   # butane
    >>> hetero = generate_hetero(mol, pos_Cl=[1])
    >>> Chem.MolToSmiles(hetero)
    'CC(Cl)CC'   # 2-chlorobutane
    """

    hetero = Chem.RWMol(mol)
    atom_count = hetero.GetNumAtoms()

    # Check valid inputs.
    if len(pos_Cl) + len(pos_Br) + len(pos_OH) > 2 * atom_count + 2:
        print("Error: too many substituents for the molecule.")
        return mol
    
    # Group definitions: element symbol → (atomic number, list of positions)
    substituents = {
        "Cl": (17, pos_Cl),
        "Br": (35, pos_Br),
        "OH": (8, pos_OH),
        "NH2": (7, pos_NH2),
        "SH": (16, pos_SH)
    }

    try:
        for group, (atomic_num, positions) in substituents.items():
            for pos in positions:
                if pos > atom_count:
                    return f"Error: carbon {pos} does not exist for group {group}."
                atom_index = hetero.AddAtom(Chem.Atom(atomic_num))
                hetero.AddBond(pos, atom_index, Chem.BondType.SINGLE)

        Chem.SanitizeMol(hetero)

    except Exception as e:
        print(f"Error: {e}")
        return mol

    print(f"---HETERO--- Generated molecule: {Chem.MolToSmiles(hetero)}")
    for group, positions in substituents.items():
        if positions:
            print(f"  {group} added at position(s): {positions[1]}")
    
    return hetero

# Example
test_alkane = generate_alkane_straight(5)
test_hetero = generate_hetero(test_alkane, pos_Cl=[1], pos_Br=[2], pos_OH=[3], pos_NH2=[3], pos_SH=[4])

Draw.MolsToGridImage(
    [test_alkane, test_hetero],
    subImgSize = (300, 300),
    molsPerRow = 4
)

------------ generated molecule CCCCC with 5 carbons
---HETERO--- Generated molecule: CC(Cl)C(Br)C(N)(O)CS
  Cl added at position(s): [1]
  Br added at position(s): [2]
  OH added at position(s): [3]
  NH2 added at position(s): [3]
  SH added at position(s): [4]

# Generate ethers and sulfides

def generate_ether(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCCC'), 
        pos_O: list = [], 
        pos_N: list = [], 
        pos_S: list = []
    ) -> Chem.Mol:
    """
    Generate ethers, sulfides, and nitrogen-substituted analogues by replacing carbons 
    with heteroatoms in a hydrocarbon backbone.

    This function replaces specified carbon atoms in the input molecule with oxygen, 
    sulfur, or nitrogen to create ethers, sulfides, or amine analogues. Terminal carbons 
    are automatically converted to alcohols, thiols, or amines as appropriate.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        The base hydrocarbon molecule to modify. Defaults to pentane (`CCCCC`).
    pos_O : list of int, optional
        Atom indices (0-based) of carbons to be replaced with oxygen.
    pos_N : list of int, optional
        Atom indices (0-based) of carbons to be replaced with nitrogen.
    pos_S : list of int, optional
        Atom indices (0-based) of carbons to be replaced with sulfur.

    Returns
    -------
    rdkit.Chem.Mol
        The modified molecule containing the requested heteroatom substitutions.
        If an invalid position or sanitization error occurs, the original molecule 
        is returned unchanged.

    Raises
    ------
    ValueError
        If the total number of requested replacements exceeds the number of atoms 
        in the molecule.
    RuntimeError
        If RDKit fails to sanitize the modified molecule.

    Notes
    -----
    - Atom indices are 0-based and must correspond to existing carbon atoms in `mol`.
    - Replacing terminal carbons automatically generates functional groups such as 
      alcohols, thiols, or amines.
    - The function ensures chemical validity by sanitizing the molecule after 
      modification.
    """

    ether = Chem.RWMol(mol)
    atom_count = ether.GetNumAtoms()

    # Check input validity
    if len(pos_O) + len(pos_S) > atom_count:
        print("Error: Number of atoms that need conversion exceeds actual atom count.")
        return mol
    
    replacements = {
        "O": (8, pos_O),
        "N": (7, pos_N),
        "S": (16, pos_S)
    }

    try:
        for group, (atomic_num, positions) in replacements.items():
            for pos in positions:
                if pos > atom_count:
                    print(f"Error: carbon {pos} does not exist for group {group}.")
                    return mol
                ether.ReplaceAtom(pos, Chem.Atom(atomic_num), updateLabel=True)

        Chem.SanitizeMol(ether)

    except Exception as e:
        return f"Error: {e}"
    
    print(f"---ETHER---- Generated molecule: {Chem.MolToSmiles(ether)}")
    for group, positions in replacements.items():
        if positions:
            print(f"  {group} added at position(s): {positions[1]}")

    return ether


# Example
test_alkane = generate_alkane_straight(5)
test_ether = generate_ether(mol=Chem.MolFromSmiles('CCCCC'), pos_O=[1], pos_N=[2], pos_S=[3])

Draw.MolsToGridImage(
    [test_alkane, test_ether],
    subImgSize = (300, 300),
    molsPerRow = 4
)

------------ generated molecule CCCCC with 5 carbons
---ETHER---- Generated molecule: CONSC
  O added at position(s): [1]
  N added at position(s): [2]
  S added at position(s): [3]

# Generate ketone

def generate_ketone(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCCC'), 
        pos_O: list = [], 
        pos_N: list = [], 
        pos_S: list = []
    ) -> Chem.Mol:
    """
    Generate aldehydes, ketones, imines, and thione analogues by adding 
    double-bonded heteroatoms to a hydrocarbon backbone.

    This function attaches oxygen, nitrogen, or sulfur to specified carbon 
    atoms to form ketones, aldehydes, imines, or thiones, depending on 
    the chosen substituents and positions.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        The base hydrocarbon molecule to modify. Defaults to pentane (`CCCCC`).
    pos_O : list of int, optional
        Atom indices (0-based) where oxygen should be double-bonded to form ketones or aldehydes.
    pos_N : list of int, optional
        Atom indices (0-based) where nitrogen should be double-bonded to form imines.
    pos_S : list of int, optional
        Atom indices (0-based) where sulfur should be double-bonded to form thiones.

    Returns
    -------
    rdkit.Chem.Mol
        The modified molecule with the requested double-bonded functional groups.
        If invalid positions or sanitization errors occur, the original molecule is returned.

    Raises
    ------
    ValueError
        If the total number of requested substituents exceeds the number of atoms in the molecule.
    RuntimeError
        If RDKit fails to sanitize the modified molecule.

    Notes
    -----
    - Atom indices are 0-based and must correspond to existing carbon atoms in `mol`.
    - Double bonds are added between the specified carbon and the heteroatom.
    - Empty arrays indicate no addition of that functional group.
    """

    ketone = Chem.RWMol(mol)
    atom_count = ketone.GetNumAtoms()

    # Check input validity
    if len(pos_O) + len(pos_N) + len(pos_S) > atom_count:
        print("Error: Too many atoms to add to original molecule.")
        return mol
    
    substituents = {
        "O": (8, pos_O),
        "N": (7, pos_N),
        "S": (16, pos_S)
    }

    try:
        for group, (atomic_num, position) in substituents.items():
            for pos in position:
                if pos > atom_count:
                    return f"Error: carbon {pos} does not exist for group {group}."
                
                new_atom = ketone.AddAtom(Chem.Atom(atomic_num))
                ketone.AddBond(pos, new_atom, Chem.BondType.DOUBLE) # 0-based indexing

        Chem.SanitizeMol(ketone)

    except Exception as e:
        print(f"Error: {e}")
        return mol
    
    print(f"---KETONE--- Generated molecule: {Chem.MolToSmiles(ketone)}")
    for group, position in substituents.items():
        if position:
            print(f"  {group} added at position(s): {position[1]}")

    return ketone


# Example
test_alkane = generate_alkane_straight(5)
test_ketone = generate_ketone(mol=test_alkane, pos_O=[1], pos_N=[2], pos_S=[3])

Draw.MolsToGridImage(
    [test_alkane, test_ketone],
    subImgSize = (300, 300),
    molsPerRow = 4
)

------------ generated molecule CCCCC with 5 carbons
---KETONE--- Generated molecule: CC(=O)C(=N)C(C)=S
  O added at position(s): [1]
  N added at position(s): [2]
  S added at position(s): [3]

# Generate compound functional groups

# Carboxy and ester analogues
def generate_carboxy(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCCC(CC)CC'), 
        pos_carboxy: list = [], 
        pos_amide: list = [], 
        pos_halide:list = []
    ) -> Chem.Mol:
    """
    Generate carboxylic acids and their analogues - amides and acyl halides.

    This function attaches multiple functional groups to a hydrocarbon backbone 
    to form carboxylic acids (COOH), amides (CONH2), and acyl halides (COX). 
    Positions can be specified as **positive** or **negative** integers to indicate 
    the direction of the substituent.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        The base hydrocarbon molecule. Defaults to a branched decane (`CCCCC(CC)CC`).
    pos_carboxy : list of int, optional
        Carbon indices for carboxylic acid addition. Uses secondary carbons by convention.
    pos_amide : list of int, optional
        Carbon indices for amide addition. Uses secondary carbons by convention.
    pos_halide : list of int, optional
        Carbon indices for acyl halide addition. Uses terminal carbons by convention.

    Returns
    -------
    rdkit.Chem.Mol
        The modified molecule containing the specified compound functional groups.
        Returns the original molecule if invalid positions are supplied or sanitization fails.

    Notes
    -----
    - Negative values in `pos_carboxy` or `pos_amide` indicate the reverse direction for 
      functional group attachment.
    - A random number generator may be used internally to validate positions.
    - Carboxy, amide, and halide groups are generated using combinations of ketone, ether, 
      and halogen functions.
    """
    carboxy = Chem.RWMol(mol)
    atom_count = carboxy.GetNumAtoms()

    # Check input validity
    if len(pos_carboxy) + len(pos_amide) + len(pos_halide) > atom_count:
        print("Error: too many positions to add than the actual molecule.")
        return mol
    
    substituents = {
        "COOH": pos_carboxy,
        "CONH2": pos_amide,
        "COX": pos_halide
    }

    try:
        for group, position in substituents.items():
            for pos in position:

                if abs(pos) > atom_count:
                    print(f"Error: carbon {pos} does not exist for group {group}.")
                    return mol
                sign = 1 if pos > 0 else -1
                
                carboxy = generate_ketone(carboxy, pos_O=[abs(pos)])
                if group == "COOH":
                    carboxy = generate_ether(carboxy, pos_O=[abs(pos)+sign])
                elif group == "CONH2":
                    carboxy = generate_ether(carboxy, pos_N=[abs(pos)+sign])
                elif group == "COX":
                    carboxy = generate_hetero(carboxy, pos_Br=[abs(pos)])

        Chem.SanitizeMol(carboxy)
    
    except Exception as e:
        print(f"Error: {e}")
        return mol
    
    print("---CARBOXY-- Compound function, see upper lines for details")
    return carboxy


# Anhydrides
def generate_anhydride(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCCC'), 
        pos_O: list = []
    ) -> Chem.Mol:
    """
    Generate anhydride functional groups from a hydrocarbon backbone.

    This function creates anhydrides by adding oxygen and carbonyl groups to 
    adjacent carbons, forming the characteristic -C(=O)-O-C(=O)- motif.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        Base hydrocarbon molecule. Defaults to pentane (`CCCCC`).
    pos_O : list of int, optional
        Carbon indices where the oxygen atoms will participate in anhydride formation.

    Returns
    -------
    rdkit.Chem.Mol
        The modified molecule containing anhydride functional groups.
        Returns the original molecule if an error occurs during bond addition or sanitization.

    Notes
    -----
    - The function uses previously defined `generate_ether` and `generate_ketone` functions 
      to assemble the anhydride motif.
    - Adjacent carbons are automatically considered for double bond placement during generation.
    """

    anhydride = Chem.RWMol(mol)

    try:
        for pos in pos_O:
            anhydride = generate_ether(anhydride, pos_O=[pos]) 
            anhydride = generate_ketone(anhydride, pos_O=[pos+1, pos-1])

        Chem.SanitizeMol(anhydride)

    except Exception as e:
        print(f"Error in sanitization: {e}")
        return mol
    
    print("---ANHYDR--- Compound function, see upper lines for details")
    return anhydride


# Enols and enamines
def generate_enol(
        mol: Chem.Mol = Chem.MolFromSmiles('CCCCC'), 
        pos_O: list = [], 
        pos_N: list = []
    ) -> Chem.Mol:
    """
    Generate enols and enamines by adding double-bonded oxygen or nitrogen with adjacent hydrogen.

    This function modifies a hydrocarbon backbone to form enols (C=COH) or enamines (C=CNH2).
    **Positive** and **negative** values in positions indicate the directionality of the functional group.

    Parameters
    ----------
    mol : rdkit.Chem.Mol, optional
        Base hydrocarbon molecule. Defaults to pentane (`CCCCC`).
    pos_O : list of int, optional
        Carbon indices where enol groups should be formed.
    pos_N : list of int, optional
        Carbon indices where enamine groups should be formed.

    Returns
    -------
    rdkit.Chem.Mol
        The modified molecule containing the specified enol or enamine groups.
        Returns the original molecule if invalid positions are supplied or sanitization fails.

    Notes
    -----
    - Positive and negative values in `pos_O` or `pos_N` indicate direction of bond addition.
    - The function relies on `generate_unsaturated` to create double bonds and 
      `generate_hetero` to attach OH or NH2 groups.
    """
    enol = Chem.RWMol(mol)
    atom_count = enol.GetNumAtoms()

    substituents = {
        "C=CO": pos_O,
        "C=CN": pos_N
    }

    try:
        for group, position in substituents.items():
            for pos in position:
                if pos > atom_count:
                    return f"Error: carbon {pos} does not exist for group {group}."
                sign = 0 if pos > 0 else -1

                enol = generate_unsaturated(enol, pos_double=[abs(pos)+sign])
                
                if group == "C=CO":
                    enol = generate_hetero(enol, pos_OH=[abs(pos)])
                if group == "C=CN":
                    enol = generate_hetero(enol, pos_NH2=[abs(pos)])
    
        Chem.SanitizeMol(enol)

    except Exception as e:
        print(f"Error: {e}")
        return mol
    
    print("---ENOL----- Compound function, see upper lines for details")
    return enol


# Examples
test_alkane = Chem.MolFromSmiles('CCCCC(CC)CC')
test_carboxy = generate_carboxy(mol=test_alkane, pos_carboxy=[-3], pos_amide=[5], pos_halide=[8])
test_anhydride = generate_anhydride(test_alkane, pos_O=[2])
test_enol = generate_enol(test_alkane, pos_O=[3], pos_N=[-1])

Draw.MolsToGridImage(
    [test_alkane, test_carboxy, test_anhydride, test_enol],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---KETONE--- Generated molecule: CCCC(=O)C(CC)CC
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: CCOC(=O)C(CC)CC
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: CCOC(=O)C(CC)C(C)=O
  O added at position(s): [5]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: CCOC(=O)C(CC)C(N)=O
  O added at position(s): []
  N added at position(s): [6]
  S added at position(s): []
---KETONE--- Generated molecule: CCOC(=O)C(CC=O)C(N)=O
  O added at position(s): [8]
  N added at position(s): []
  S added at position(s): []
---HETERO--- Generated molecule: CCOC(=O)C(CC(=O)Br)C(N)=O
  Cl added at position(s): []
  Br added at position(s): [8]
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []
---CARBOXY-- Compound function, see upper lines for details
---ETHER---- Generated molecule: CCOCC(CC)CC
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: CCC(CC)C(=O)OC(C)=O
  O added at position(s): [3, 1]
  N added at position(s): []
  S added at position(s): []
---ANHYDR--- Compound function, see upper lines for details
---ADDBOND-- generated molecule CCCC=C(CC)CC with double bonds are added at [3]; triple bonds are added at []
---HETERO--- Generated molecule: CCCC(O)=C(CC)CC
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [3]
  NH2 added at position(s): []
  SH added at position(s): []
---ADDBOND-- generated molecule C=CCC(O)=C(CC)CC with double bonds are added at [0]; triple bonds are added at []
---HETERO--- Generated molecule: C=C(N)CC(O)=C(CC)CC
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): []
  NH2 added at position(s): [1]
  SH added at position(s): []
---ENOL----- Compound function, see upper lines for details

# Generate aromatic heterocyclic compounds
benzene = Chem.MolFromSmiles('C1C=CC=CC=1')
indene = Chem.MolFromSmiles('c1ccc2c(c1)CC=C2')

pyrrole = Chem.MolFromSmiles('N1C=CC=C1')
imidazole = Chem.MolFromSmiles('N1C=NC=C1')
furan = Chem.MolFromSmiles('O1C=CC=C1')
thiophene = Chem.MolFromSmiles('S1C=CC=C1')
pyridine = Chem.MolFromSmiles('N1C=CC=CC=1')
pyrimidine = Chem.MolFromSmiles('N1C=NC=CC=1')
indole = Chem.MolFromSmiles('C12=C(C=CN2)C=CC=C1')
purine = Chem.MolFromSmiles('c1c2c(nc[nH]2)ncn1')

aromatic_compounds = [
    pyrrole, imidazole, furan, thiophene,
    pyridine, pyrimidine, indole, purine
]

aromatic_compounds[5]

# Important biochemical compounds

# Amino acids
glycine       = Chem.MolFromSmiles('C(C(=O)O)N')
alanine       = Chem.MolFromSmiles('CC(C(=O)O)N')
valine        = Chem.MolFromSmiles('CC(C)C(C(=O)O)N')
leucine       = Chem.MolFromSmiles('CC(C)CC(C(=O)O)N')
isoleucine    = Chem.MolFromSmiles('CCC(C)C(C(=O)O)N')  # correct isomer
proline       = Chem.MolFromSmiles('C1CC(NC1)C(=O)O')
methionine    = Chem.MolFromSmiles('CSCCC(N)C(=O)O')

phenylalanine = Chem.MolFromSmiles('c1ccccc1CC(C(=O)O)N')
tyrosine      = Chem.MolFromSmiles('NC(Cc1ccc(O)cc1)C(O)=O')
tryptophan    = Chem.MolFromSmiles('c1[nH]c2ccccc2c1CC(N)C(=O)O')

serine        = Chem.MolFromSmiles('C(C(C(=O)O)N)O')
threonine     = Chem.MolFromSmiles('CC(C(C(=O)O)N)O')
cysteine      = Chem.MolFromSmiles('C(C(C(=O)O)N)S')
asparagine    = Chem.MolFromSmiles('O=C(N)CC(N)C(=O)O')
glutamine     = Chem.MolFromSmiles('O=C(N)CCC(N)C(=O)O')

lysine        = Chem.MolFromSmiles('C(CCN)CC(C(=O)O)N')
arginine      = Chem.MolFromSmiles('C(CC(C(=O)O)N)CNC(=N)N')
histidine     = Chem.MolFromSmiles('O=C(C(CC1=CNC=N1)N)O')

aspartic_acid = Chem.MolFromSmiles('C(C(C(=O)O)N)C(=O)O')
glutamic_acid = Chem.MolFromSmiles('C(CC(=O)O)C(C(=O)O)N')

amino_acids = [
    glycine, alanine, valine, leucine, isoleucine, proline, methionine, 
    phenylalanine, tyrosine, tryptophan,
    serine, threonine, cysteine, asparagine, glutamine, 
    lysine, arginine, histidine, 
    aspartic_acid, glutamic_acid 
]


# sugars
sugars = [

]

# bases
cytosine = Chem.MolFromSmiles('N1C(=O)NC=CC(N)=1')
thymine  = Chem.MolFromSmiles('O=C1NC(=O)NC=C1C')
uracil   = Chem.MolFromSmiles('O=C1C=CNC(=O)N1')
adenine  = Chem.MolFromSmiles('NC1=NC=NC2=C1N=CN2')
guanine  = Chem.MolFromSmiles('N1C(N)=NC=2NC=NC2C1=O')

bases = [cytosine, thymine, uracil, adenine, guanine]

# lipids
lipid = [
    #triglycerides and cholesterol
]

# hormones
hormones = [

]

Draw.MolsToGridImage(
    amino_acids, subImgSize = (300, 300), molsPerRow = 5
)

import random
from collections import Counter

# similar to random.choices() but has a max_count of being chosen
def limited_choices(
        population: list, 
        weights: list = [], 
        k: int = 1, 
        max_per_item: int = 1
    ) -> list:
    """
    Randomly select elements from a population with optional weighting, 
    limiting the number of times each element can be chosen.

    This function extends `random.choices()` by removing elements from 
    consideration once they reach a specified maximum number of selections (`max_per_item`).

    Parameters
    ----------
    population : list
        A list of elements to choose from.
    weights : list of float, optional
        Relative weights corresponding to each element in `population`. 
        Must be the same length as `population`. Defaults to equal weighting.
    k : int, optional
        The number of elements to select. Defaults to 1.
    max_per_item : int, optional
        Maximum number of times each element can be selected before it is 
        temporarily removed from the pool. Defaults to 1.

    Returns
    -------
    list
        A list of length ≤ `k` containing the selected elements. If all elements 
        reach `max_per_item` before `k` selections, the returned list may be shorter.

    Notes
    -----
    - This function filters out elements that have reached `max_per_item` on each iteration.
    - If no elements remain eligible before completing `k` selections, the function stops early.
    - Equivalent to repeated weighted random sampling without replacement up to `max_per_item`.
    """

    result = []
    counts = Counter()

    if weights == []:
        weights = [1] * len(population)

    for _ in range(k):
        # Filter out items that hit their limit
        filtered = [(item, w) for item, w in zip(population, weights) if counts[item] < max_per_item]

        if not filtered:
            break  # Nothing left to pick

        items, filtered_weights = zip(*filtered)
        chosen = random.choices(items, weights=filtered_weights, k=1)[0]

        result.append(chosen)
        counts[chosen] += 1

    return result

# Quality of life function
def get_molecule(mol: Chem.Mol, factor: float, region: int):
    """
    Placeholder function that returns the input molecule unchanged.

    This function currently does not modify the molecule but is designed 
    to potentially incorporate transformations based on a scaling factor 
    or a region-specific parameter in the future.

    Parameters
    ----------
    mol : rdkit.Chem.Mol
        The input RDKit molecule object.
    factor : float
        A scaling factor intended for future use in modifying molecule properties.
    region : int
        A region identifier intended for future use in conditional transformations.

    Returns
    -------
    rdkit.Chem.Mol
        The original, unmodified molecule.

    Notes
    -----
    - Currently, this function acts as a placeholder and simply returns the input molecule.
    - Future implementations may apply transformations or selections based on `factor` and `region`.
    """
    return mol

# Degree checks
def func_carbon_degree(mol: Chem.Mol):
    """
    Categorize carbon atoms in a molecule by their degree of substitution.

    This function identifies primary, secondary, tertiary, and quaternary carbons 
    while ensuring that all neighboring atoms are also carbons (i.e., not substituted 
    by heteroatoms). This makes the resulting arrays suitable for controlled molecule 
    generation.

    Parameters
    ----------
    mol : rdkit.Chem.Mol
        The input RDKit molecule object.

    Returns
    -------
    dict
        A dictionary mapping carbon degree (1-4) to lists of atom indices (0-based) 
        corresponding to primary, secondary, tertiary, and quaternary carbons:
        - `1` : primary carbons
        - `2` : secondary carbons
        - `3` : tertiary carbons
        - `4` : quaternary carbons

    Notes
    -----
    - The indices are 0-based; add 1 if using functions that expect 1-based indexing.
    - Only carbons bonded exclusively to other carbons are included.
    - The degree of a carbon is calculated as the sum of its bonded electron pairs (bond orders).
    - This ensures simpler molecule generation by avoiding heteroatom-substituted carbons.
    """


    molecule = Chem.RWMol(mol)

    carbon_degree = {
        1: [],
        2: [],
        3: [],
        4: []
    }

    for atom in molecule.GetAtoms():
        if atom.GetAtomicNum() != 6:
            continue  # Skip non-carbon atoms

        neighbors = atom.GetNeighbors()
        if all(n.GetAtomicNum() == 6 for n in neighbors):
            degree = sum(bond.GetBondTypeAsDouble() for bond in atom.GetBonds())
            if degree in carbon_degree:
                carbon_degree[degree].append(atom.GetIdx())
    
    return carbon_degree

# Branching

def rng_branching(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes branching of a carbon backbone by selectively introducing alkyl branches.

    This function identifies eligible carbons (secondary and tertiary) within a molecule 
    and applies `generate_alkane_branched()` to introduce random branching. The degree of 
    branching is controlled by carbon type (secondary vs. tertiary) and probabilistic 
    selection rules. The function ensures valency rules are respected by limiting the 
    number of branches per atom.

    Parameters
    ----------
    mol : Chem.Mol
        The input molecule (RDKit Mol object) to be modified.

    Returns
    -------
    Chem.Mol
        A new molecule (RDKit Mol object) with randomized branches added.

    Notes
    -----
    - Only secondary and tertiary carbons are considered as branching points.
    - Branch length is randomized between 1 and 2 carbons.
    - Each carbon is limited to `(4 - carbon_degree)` additional branches, 
      ensuring valency is not exceeded.
    - Branching is skipped if no eligible carbons are found.

    See Also
    --------
    generate_alkane_branched : Function used internally to add branches.
    func_carbon_degree : Utility function for categorizing carbons by substitution degree.
    limited_choices : Helper function for controlled random selection with constraints.
    """

    branch = Chem.RWMol(mol)
    
    carbon_types = func_carbon_degree(mol)

    # Randomizing branching distribution
    # The num_branches object used to have more objects in it for primary and tertiary carbons
    # Decided to get rid of them because of too much unnecessary complexity during parameter tuning
    num_branches = {
        2: carbon_types[2],
        3: carbon_types[3]
    }

    for carbon in num_branches:

        if num_branches[carbon] == []:
            continue

        num_iteration = random.randint(1, max(min(len(num_branches[carbon]) * (4 - carbon) // 2, 4), 1)) # takes middle value of the 3
        selected_carbons = limited_choices(num_branches[carbon], k=num_iteration, max_per_item=(4-carbon))

        for atom in selected_carbons:
            length = random.randint(1, 2)
            branch = generate_alkane_branched(branch, [atom], [length])

    return branch


# Example
test_alkane = Chem.MolFromSmiles('CCCCC')
test_alkane_branched = rng_branching(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_alkane_branched],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---BRANCH--- generated molecule CCCC(C)C at positions [1]

# Cyclization

def rng_cyclization(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes cyclization of a carbon backbone by forming rings of variable size.

    This function introduces ring structures into a given molecule by selecting 
    random start and end positions along the carbon chain and applying 
    `generate_alkane_cyclized()`. The probability of forming rings of different 
    sizes is weighted to favor chemically realistic cycles. Very short molecules 
    (fewer than 3 carbons) are not cyclized and are instead returned unmodified.

    Parameters
    ----------
    mol : Chem.Mol
        The input molecule (RDKit Mol object) to be modified.

    Returns
    -------
    Chem.Mol
        A new molecule (RDKit Mol object) with a ring introduced.

    Notes
    -----
    - Cyclization requires at least 3 carbons. Molecules with fewer carbons 
      are returned unchanged.
    - Ring sizes are limited to 3-6 carbons (or smaller if the molecule itself 
      is shorter).  
    - A weighting scheme biases the likelihood of selecting certain ring sizes:
        * 3-membered and 4-membered rings are rare.
        * 5- and 6-membered rings are strongly favored.
    - The ring start position is randomized using an offset to ensure variability 
      in ring placement.

    See Also
    --------
    generate_alkane_cyclized : Function used internally to introduce a cycle.
    rng_branching : Randomizes branching in the carbon backbone.
    """
    cycle = Chem.RWMol(mol)
    atom_count = cycle.GetNumAtoms()

    if atom_count < 3:                                   # if not long enough, go straight to branching
        return mol

    end_cycle = list(range(3, min(atom_count + 1, 7))) # range is [a, b)

    weight_map = {
        1: [15],
        2: [15, 5],
        3: [15, 5, 30],
        4: [15, 5, 30, 30]
    }

    end_cycle_weights = weight_map.get(len(end_cycle), [1] * len(end_cycle)) # get() has a default, so just in case, we'll return equal prob.
    selected_end_cycle = random.choices(end_cycle, weights=end_cycle_weights, k=1)[0]

    # The offset basically randomizes where the start and end of the cycle is while maintaining the size of the cycle.
    offset = random.randint(0, atom_count - selected_end_cycle)
    cycle = generate_alkane_cyclized(cycle, start=[offset], end=[selected_end_cycle+offset-1])

    return cycle


# Example
test_alkane = Chem.MolFromSmiles('CCCCCCCCC')
test_alkane_cyclized = rng_cyclization(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_alkane_cyclized],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---CYCLE---- generated molecule CCCC1CCCCC1 starting and closing at [3] and [8]

# Unsaturation

def rng_unsaturation(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes the introduction of double and triple bonds into a carbon backbone.

    This function analyzes the bonding structure of the input molecule and 
    introduces unsaturation (double or triple bonds) at eligible positions. 
    It uses `generate_unsaturated()` to perform the actual bond modifications, 
    while applying chemical heuristics to avoid over-saturation and 
    unrealistic bonding patterns.

    Parameters
    ----------
    mol : Chem.Mol
        The input molecule (RDKit Mol object) to be modified.

    Returns
    -------
    Chem.Mol
        A new molecule (RDKit Mol object) with unsaturation introduced.

    Notes
    -----
    - Bonds are classified by order (`Bond.GetBondTypeAsDouble()`):
        * 1   → single
        * 1.5 → aromatic
        * 2   → double
        * 3   → triple
    - **Eligibility filtering**:
        * Only single bonds between atoms with valence < 4 are considered.
        * A stricter cutoff (valence < 3) is required for potential triple bonds.
    - **Neighboring bond exclusion**:
        * Single bonds directly adjacent to double bonds are excluded to 
          prevent unrealistic conjugation patterns in this simplified model.
    - **Randomization strategy**:
        * A small number of single bonds (at most one-quarter of eligible ones) 
          are randomly converted to double bonds.
        * With 10% probability, one eligible single bond is converted into a 
          triple bond (if allowed by valence rules).
    - This procedure balances diversity in bond structures with chemical 
      plausibility, though it does not guarantee strict chemical stability.

    See Also
    --------
    generate_unsaturated : Function used internally to introduce double and triple bonds.
    rng_cyclization : Randomizes cyclization of the carbon backbone.
    rng_branching : Randomizes branching in the carbon backbone.
    """
    bonded_alkane = Chem.RWMol(mol)
    bond_list = {
        1: [],
        1.5: [], # aromatic bonds count as 1.5 in Bond.GetBondTypeAsDouble()
        2: [],
        3: []
    }

    eligible_single_bond_list = []
    eligible_single_bond_list_triple = []

    # logging all the bonds into their respective bond types
    for bond in bonded_alkane.GetBonds():

        type = bond.GetBondTypeAsDouble()

        atom1, atom2 = bond.GetBeginAtom(), bond.GetEndAtom()

        atom1_degree = sum(atom_bond.GetBondTypeAsDouble() for atom_bond in atom1.GetBonds())
        atom2_degree = sum(atom_bond.GetBondTypeAsDouble() for atom_bond in atom2.GetBonds())

        bond_list[type].append(bond.GetIdx())

        # First round of filtering
        # If the atoms neighboring the bond are already saturated, the single bond is not eligible.
        if atom1_degree < 4 and atom2_degree < 4 and type == 1:
            eligible_single_bond_list.append(bond.GetIdx())

            if atom1_degree < 3 and atom2_degree < 3:
                eligible_single_bond_list_triple.append(bond.GetIdx())

    # Second round of filtering
    # Preferably (and this is what we're doing), there are no neighboring double bonds.
    try:
        for double_bond in bond_list[2]:
            if (double_bond + 1) in bond_list[1]:
                eligible_single_bond_list.remove(double_bond + 1)
                eligible_single_bond_list_triple.remove(double_bond + 1)
            if (double_bond - 1) in bond_list[1]:
                eligible_single_bond_list.remove(double_bond - 1)
                eligible_single_bond_list_triple.remove(double_bond - 1)
    except Exception as e:
        print(f"Error: {e}")
        return mol

    if eligible_single_bond_list == []:
        return mol
    
    num_choices = random.randint(1, min(1, len(eligible_single_bond_list) // 4 + 1))
    selected_single_bond_list = limited_choices(eligible_single_bond_list, k=num_choices, max_per_item=1)

    for bond_index in selected_single_bond_list:
        bonded_alkane = generate_unsaturated(bonded_alkane, pos_double=[bond_index]) # again, our functions are 1-based indices
        if (bond_index + 1) in eligible_single_bond_list_triple:
            eligible_single_bond_list_triple.remove(bond_index+1)
        if (bond_index - 1) in eligible_single_bond_list_triple:
            eligible_single_bond_list_triple.remove(bond_index-1)

    if eligible_single_bond_list_triple != [] and random.random() < 0.1:
        selected_single_bond_triple = random.choices(eligible_single_bond_list_triple, k=1)[0]
        bonded_alkane = generate_unsaturated(bonded_alkane, pos_triple=[selected_single_bond_triple])

    return bonded_alkane


# Example
test_alkane = Chem.MolFromSmiles('CCCCCC')
test_unsaturated = rng_unsaturation(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_unsaturated],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---ADDBOND-- generated molecule C=CCCCC with double bonds are added at [4]; triple bonds are added at []

# Heteroatoms

def rng_hetero(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes heteroatom substitution in hydrocarbons by introducing halides, 
    hydroxyl, thiol, or amine groups. Wraps the `generate_hetero()` function.

    The function selects carbons of varying substitution degrees (primary, 
    secondary, tertiary) as candidate sites. For each chosen carbon, one 
    heteroatom substituent is added with a probability distribution:

    - 25% chance: bromine (-Br)
    - 25% chance: chlorine (-Cl)
    - 48% chance: hydroxyl (-OH)
    - 1% chance: thiol (-SH)
    - 1% chance: amine (-NH₂)

    Parameters
    ----------
    mol : Chem.Mol
        Input RDKit molecule to be modified.

    Returns
    -------
    hetero : Chem.Mol
        Modified RDKit molecule with randomly introduced heteroatom substituents.

    Notes
    -----
    - Only carbon atoms without heteroatom neighbors are considered eligible.
    - Each eligible carbon can be substituted at most once per call.
    - Substitution type is chosen randomly per atom, but only one heteroatom
      is added to each selected site.
    """

    hetero = Chem.RWMol(mol)
    
    carbon_degree = func_carbon_degree(hetero)
    carbon_list = carbon_degree[1] + carbon_degree[2] + carbon_degree[3]
        
    selected_carbons = limited_choices(carbon_list, k=1, max_per_item=1)

    # Better version of randomization compared to if statements
    for atom in selected_carbons:
        substituents = [
            (0.25, {"pos_Br": [atom]}),
            (0.50, {"pos_Cl": [atom]}),
            (0.98, {"pos_OH": [atom]}),
            (0.99, {"pos_SH": [atom]}),
            (1.00, {"pos_NH2": [atom]}),
        ]

        num_random = random.random()
        for cutoff, kwargs in substituents:
            if num_random < cutoff:
                hetero = generate_hetero(hetero, **kwargs)
                break

    return hetero


# Example
test_alkane = Chem.MolFromSmiles("CCCCCCC")
test_hetero = rng_hetero(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_hetero],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---HETERO--- Generated molecule: CCCCC(O)CC
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [4]
  NH2 added at position(s): []
  SH added at position(s): []

# Ether and its nitrogen and sulfur analogues

def rng_ethers(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes ether, sulfide, or secondary amine substitution on secondary carbons.
    Wraps the `generate_ether()` function.

    The function identifies secondary carbons (degree = 2) in the molecule and 
    randomly substitutes one with an oxygen (-O-), nitrogen (-NH-), or sulfur (-S-) 
    atom to form ether, secondary amine, or sulfide groups, respectively.

    Probabilities for substitution are:
    - 98% chance: oxygen (ether)
    - 1% chance: nitrogen (secondary amine)
    - 1% chance: sulfur (sulfide)

    Parameters
    ----------
    mol : Chem.Mol
        Input RDKit molecule to be modified.

    Returns
    -------
    ether : Chem.Mol
        Modified RDKit molecule with a randomly introduced ether, amine, or sulfide group.

    Notes
    -----
    - Only secondary carbons are eligible for substitution.
    - At most one substitution is made per function call.
    - If no secondary carbons are present, the original molecule is returned unchanged.
    """

    
    ether = Chem.RWMol(mol)
    carbon_degree = func_carbon_degree(ether)

    if carbon_degree[2] == []:
        return mol

    # We only need the secondary carbons
    selected_carbon = random.choices(carbon_degree[2], k=1)[0]

    substituents = [
        (0.98, {"pos_O": [selected_carbon]}),
        (0.99, {"pos_N": [selected_carbon]}),
        (1.00, {"pos_S": [selected_carbon]})
    ]

    num_random = random.random()

    for cutoff, kwargs in substituents:
        if num_random < cutoff:
            ether = generate_ether(ether, **kwargs)
            break
    
    return ether


# Example
test_alkane = Chem.MolFromSmiles('CCCCCCC')
test_ether = rng_ethers(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_ether],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---ETHER---- Generated molecule: CCCCCOC
  O added at position(s): [1]
  N added at position(s): []
  S added at position(s): []

# Ketones and analogues

def rng_ketones(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes the introduction of aldehyde, ketone, imine, or thione groups.
    Wraps the `generate_ketone()` function.

    The function identifies primary and secondary carbons that are not already 
    doubly bonded and randomly substitutes one with oxygen, nitrogen, or sulfur 
    to form a carbonyl- or imine-like functional group.

    Probabilities for substitution are:
    - 98% chance: oxygen (C=O, aldehyde/ketone)
    - 1% chance: nitrogen (C=NH, imine)
    - 1% chance: sulfur (C=S, thione)

    Parameters
    ----------
    mol : Chem.Mol
        Input RDKit molecule to be modified.

    Returns
    -------
    ketone : Chem.Mol
        Modified RDKit molecule containing a randomly introduced 
        aldehyde, ketone, imine, or thione group.

    Notes
    -----
    - Eligible carbons include:
        * Primary carbons (degree = 1).
        * Secondary carbons (degree = 2), provided they are not already doubly bonded.
    - At most one functional group is introduced per function call.
    - If no eligible carbons are available, the input molecule is returned unchanged.
    """
    
    ketone = Chem.RWMol(mol)
    carbon_degree = func_carbon_degree(ketone)

    eligible_carbons = []

    # This ensures that the carbon is secondary rather than doubly bonded
    for secondary_carbon in carbon_degree[2]:
        if ketone.GetAtomWithIdx(secondary_carbon).GetDegree() == 2:
            eligible_carbons.append(secondary_carbon)
    
    eligible_carbons = eligible_carbons + carbon_degree[1]

    if eligible_carbons == []:
        return mol
    
    selected_carbon = random.choices(eligible_carbons, k=1)[0]
    
    substituents = [
        (0.98, {"pos_O": [selected_carbon]}),
        (0.99, {"pos_N": [selected_carbon]}),
        (1.00, {"pos_S": [selected_carbon]})
    ]

    num_random = random.random()

    for cutoff, kwargs in substituents:
        if num_random < cutoff:
            ketone = generate_ketone(ketone, **kwargs)
            break
    
    return ketone


# Example
test_alkane = Chem.MolFromSmiles('CCCCCCC')
test_ketone = rng_ketones(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_ketone],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---KETONE--- Generated molecule: CCCCC(=O)CC
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []

# Carboxys and the analogues

def rng_carboxys(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes carboxylic acid, ester, amide, or acyl halide generation.

    This function probabilistically generates carboxyl-containing functional groups 
    by calling `generate_carboxy()`. It first identifies eligible carbons in the molecule 
    (primary or secondary carbons, with neighbor checks), then applies weighted randomization 
    to choose between functional group outcomes. If no valid carbons exist or the molecule 
    is too small, it returns the original molecule.

    Probabilities:
        - 34% chance: carboxylic acid or amide (secondary carbon + primary neighbor)
            - 75% → carboxylic acid
            - 25% → amide
        - 33% chance: carboxylic acid or amide (secondary carbon + secondary neighbor)
            - 75% → ester
            - 25% → amide
        - 33% chance: acyl halide (primary carbon)

    Parameters
    ----------
    mol : Chem.Mol
        The input RDKit molecule to be modified.

    Returns
    -------
    Chem.Mol
        A new RDKit molecule with a carboxylic acid, ester, amide, or acyl halide 
        added. If no modification is possible, returns the input molecule unchanged.

    Notes
    -----
    - Directionality is preserved by encoding carbon indices with signed integers 
      (positive vs. negative) when passed into `generate_carboxy()`.
    - The function includes a self-check: if no eligible carbons are found or the 
      molecule is too small, the function exits early without modification.
    """

    def pick_carboxy_or_amide(mol, eligible):
        """Helper to randomly select carboxylic acid (75%) or amide (25%)."""
        selected = random.choice(eligible)
        return (
            generate_carboxy(mol, pos_carboxy=[selected])
            if random.random() < 0.75
            else generate_carboxy(mol, pos_amide=[selected])
        )

    # Self-check mechanism
    carboxy = Chem.RWMol(mol)
    atom_count = carboxy.GetNumAtoms()
    carbon_degree = func_carbon_degree(carboxy)

    eligible_carbons = []
    eligible_carbons_ester = []

    for secondary_carbon in carbon_degree[2]:
        neighbor = carboxy.GetAtomWithIdx(secondary_carbon).GetNeighbors()
        for n in neighbor:
            if n.GetIdx() in carbon_degree[1]:
                sign = 1 if (secondary_carbon - n.GetIdx() < 0) else -1
                eligible_carbons.append(secondary_carbon * sign)
            elif n.GetIdx() in carbon_degree[2]:
                sign = 1 if (secondary_carbon - n.GetIdx() < 0) else -1
                eligible_carbons_ester.append(secondary_carbon * sign)

    if (not eligible_carbons and not eligible_carbons_ester) or atom_count < 3:
        return mol

    type_random = random.random()

    if type_random < 0.34 and eligible_carbons:
        carboxy = pick_carboxy_or_amide(carboxy, eligible_carbons)
    elif type_random < 0.67 and eligible_carbons_ester:
        carboxy = pick_carboxy_or_amide(carboxy, eligible_carbons_ester)
    elif carbon_degree[1]:
        selected = random.choice(carbon_degree[1])
        carboxy = generate_carboxy(carboxy, pos_halide=[selected])

    return carboxy


# Example
test_alkane = Chem.MolFromSmiles('CCCCCCC')
test_carboxy = rng_carboxys(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_carboxy],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---KETONE--- Generated molecule: CCCCCC(C)=O
  O added at position(s): [1]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: CCCCCC(=O)O
  O added at position(s): [0]
  N added at position(s): []
  S added at position(s): []
---CARBOXY-- Compound function, see upper lines for details

def rng_anhydrides(mol: Chem.Mol) -> Chem.Mol:
    """
    Randomizes anhydride generation.

    This function probabilistically introduces an anhydride group into the input 
    molecule by calling `generate_anhydride()`. It identifies eligible secondary 
    carbons based on bonding patterns and atom type, and attempts to modify the 
    molecule accordingly. If no suitable carbons are found, the molecule is too 
    small, or an error occurs during generation, the input molecule is returned 
    unchanged.

    Parameters
    ----------
    mol : Chem.Mol
        The input RDKit molecule to be modified.

    Returns
    -------
    Chem.Mol
        A new RDKit molecule with an anhydride group added. If no modification 
        is possible or an error occurs, returns the input molecule unchanged.

    Notes
    -----
    - Eligibility requires secondary carbons whose neighbors are all carbons 
      (atomic number 6) and whose bonds sum to a bond order of 2 each.
    - Includes a self-check: if fewer than 5 atoms are present or no eligible 
      carbons exist, no modification is attempted.
    - A `try-except` block is used to handle bugs or edge cases in 
      `generate_anhydride()`, defaulting back to returning the unmodified molecule.
    """

    anhydride = Chem.RWMol(mol)
    atom_count = anhydride.GetNumAtoms()
    carbon_degree = func_carbon_degree(anhydride)

    eligible_carbons = []

    for secondary_carbon in carbon_degree[2]:
        neighbors = anhydride.GetAtomWithIdx(secondary_carbon).GetNeighbors()
        if all(sum(bond.GetBondTypeAsDouble() for bond in n.GetBonds()) == 2 for n in neighbors) and all(n.GetAtomicNum() == 6 for n in neighbors):
            eligible_carbons.append(secondary_carbon)

    if eligible_carbons == [] or atom_count < 5:
        return mol
    
    selected_carbon = random.choices(eligible_carbons, k=1)[0]
    
    # A lot of bugs somehow, so used try-except to redirect the bugs to rng_halides()
    try:
        anhydride = generate_anhydride(anhydride, pos_O=[selected_carbon]) # 1-based index
    except Exception as e:
        print(f"Error: {e}")
        return mol

    return anhydride


# Example
test_alkane = Chem.MolFromSmiles('CCCCCCC')
test_anhydride = rng_anhydrides(test_alkane)

Draw.MolsToGridImage(
    [test_alkane, test_anhydride],
    subImgSize = (300, 300),
    molsPerRow = 4
)

---ETHER---- Generated molecule: CCCCOCC
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: CCCC(=O)OC(C)=O
  O added at position(s): [3, 1]
  N added at position(s): []
  S added at position(s): []
---ANHYDR--- Compound function, see upper lines for details

# Testing

# Change the SMILES of below to test
test_smiles = 'CCCCC'

# Keep these code intact
test_mol = Chem.MolFromSmiles(test_smiles)

test_alkane_branched = rng_branching(test_mol)
test_alkane_cyclized = rng_cyclization(test_mol)
test_unsaturated = rng_unsaturation(test_mol)
test_hetero = rng_hetero(test_mol)
test_ether = rng_ethers(test_mol)
test_ketone = rng_ketones(test_mol)
test_carboxy = rng_carboxys(test_mol)
test_anhydride = rng_anhydrides(test_mol)

Draw.MolsToGridImage(
    [test_mol, test_alkane_branched, test_alkane_cyclized, test_unsaturated, 
     test_hetero, test_ether, test_ketone, test_carboxy, test_anhydride],
    subImgSize = (300, 300),
    molsPerRow = 5
)

---BRANCH--- generated molecule CCCC(C)CC at positions [1]
---CYCLE---- generated molecule C1CCCC1 starting and closing at [0] and [4]
---ADDBOND-- generated molecule CC=CCC with double bonds are added at [1]; triple bonds are added at []
---HETERO--- Generated molecule: CCC(Br)CC
  Cl added at position(s): []
  Br added at position(s): [2]
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []
---ETHER---- Generated molecule: CCCOC
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: CCCCC=O
  O added at position(s): [0]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: CCCCC=O
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
---HETERO--- Generated molecule: CCCCC(=O)Br
  Cl added at position(s): []
  Br added at position(s): [4]
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []
---CARBOXY-- Compound function, see upper lines for details
---ETHER---- Generated molecule: CCOCC
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: CC(=O)OC(C)=O
  O added at position(s): [3, 1]
  N added at position(s): []
  S added at position(s): []
---ANHYDR--- Compound function, see upper lines for details

# XP cutoff generation function
def xp_cutoffs(
        levels: int = 11, 
        max_exp: int = 10000, 
        k: float = 2.0
    ) -> list:
    """
    Generate cumulative XP thresholds for a level progression system.

    The progression curve is defined by a power function:
    ``threshold = max_exp * (i / (levels - 1)) ** k``,
    where ``i`` is the level index (0-based). A higher exponent ``k``
    produces steeper, back-loaded progression, while lower values yield
    gentler, front-loaded curves.

    Parameters
    ----------
    levels : int, optional
        Total number of levels, including the starting level (default is 11).
    max_exp : int, optional
        Maximum experience required to reach the final level (default is 10000).
    k : float, optional
        Exponent controlling the non-linearity of progression.
        - ``k = 1.0`` → linear
        - ``k > 1.0`` → back-loaded (slower early, grindier later)
        - ``0 < k < 1.0`` → front-loaded (faster early, slower later)

    Returns
    -------
    list of int
        A list of cumulative XP thresholds, where index ``i`` corresponds
        to the XP required to reach level ``i+1``.

    Examples
    --------
    >>> xp_cutoffs(levels=5, max_exp=1000, k=2.0)
    [0, 62, 250, 562, 1000]

    >>> xp_cutoffs(levels=4, max_exp=500, k=1.5)
    [0, 79, 280, 500]
    """
    # returns list of cumulative XP thresholds for levels 1..levels
    cutoffs = []
    for i in range(levels):
        frac = i / (levels - 1)   # 0 for level 1, 1 for final level
        cutoffs.append((i, int(round(max_exp * (frac ** k)))))
    return cutoffs

# Examples
print(xp_cutoffs(levels=31, max_exp=30000, k=2.0))   # quadratic
print(xp_cutoffs(levels=31, max_exp=30000, k=1.5))   # gentler
print(xp_cutoffs(levels=31, max_exp=30000, k=3.0))   # steep

[(0, 0), (1, 33), (2, 133), (3, 300), (4, 533), (5, 833), (6, 1200), (7, 1633), (8, 2133), (9, 2700), (10, 3333), (11, 4033), (12, 4800), (13, 5633), (14, 6533), (15, 7500), (16, 8533), (17, 9633), (18, 10800), (19, 12033), (20, 13333), (21, 14700), (22, 16133), (23, 17633), (24, 19200), (25, 20833), (26, 22533), (27, 24300), (28, 26133), (29, 28033), (30, 30000)]
[(0, 0), (1, 183), (2, 516), (3, 949), (4, 1461), (5, 2041), (6, 2683), (7, 3381), (8, 4131), (9, 4930), (10, 5774), (11, 6661), (12, 7589), (13, 8558), (14, 9564), (15, 10607), (16, 11685), (17, 12797), (18, 13943), (19, 15121), (20, 16330), (21, 17570), (22, 18840), (23, 20139), (24, 21466), (25, 22822), (26, 24205), (27, 25614), (28, 27051), (29, 28513), (30, 30000)]
[(0, 0), (1, 1), (2, 9), (3, 30), (4, 71), (5, 139), (6, 240), (7, 381), (8, 569), (9, 810), (10, 1111), (11, 1479), (12, 1920), (13, 2441), (14, 3049), (15, 3750), (16, 4551), (17, 5459), (18, 6480), (19, 7621), (20, 8889), (21, 10290), (22, 11831), (23, 13519), (24, 15360), (25, 17361), (26, 19529), (27, 21870), (28, 24391), (29, 27099), (30, 30000)]

# Calculate level from the amount of XP and difficulty
def level_from_exp(exp: int, diff: int = 1) -> int:
    """Convert EXP into a level using xp_cutoffs."""

    # Change this variable to experiment with different level cutoffs
    exp_list = xp_cutoffs(levels=31, max_exp=30000, k=1.5)

    for lvl, cutoff in exp_list:
        if diff * exp <= cutoff:
            return lvl
        
    return len(exp_list)


def complexity_budget(level: int) -> int:
    """Assign a complexity budget that scales with level."""
    # Example: quadratic scaling, tunable
    return max(1, int((level ** 1.5) / 5))

for i in range(31):
    print(i, (1 + complexity_budget(i) // 3, complexity_budget(i)))

0 (1, 1)
1 (1, 1)
2 (1, 1)
3 (1, 1)
4 (1, 1)
5 (1, 2)
6 (1, 2)
7 (2, 3)
8 (2, 4)
9 (2, 5)
10 (3, 6)
11 (3, 7)
12 (3, 8)
13 (4, 9)
14 (4, 10)
15 (4, 11)
16 (5, 12)
17 (5, 14)
18 (6, 15)
19 (6, 16)
20 (6, 17)
21 (7, 19)
22 (7, 20)
23 (8, 22)
24 (8, 23)
25 (9, 25)
26 (9, 26)
27 (10, 28)
28 (10, 29)
29 (11, 31)
30 (11, 32)

# The rng function list

rng_function_list = [
    rng_branching,
    rng_cyclization,
    rng_unsaturation,
    rng_hetero,
    rng_ethers,
    rng_ketones,
    rng_carboxys,
    rng_anhydrides
]

# MDA for rng functions

def mda_randomization(diff: int, exp: int, region: int):
    """
    Master Detection Apparatus (MDA) randomizes molecule generation and 
    scales structural complexity based on difficulty, player experience, 
    and region-specific biases.

    Parameters
    ----------
    diff : int
        Difficulty modifier (1-3). Higher values increase scaling 
        of molecular complexity relative to player experience.
    exp : int
        Current player experience points (typical range: 0-10000). 
        Used to derive player level and complexity budget.
    region : int
        Region identifier (0-5). Controls which functional group families 
        are preferentially applied during molecule generation.  
            Region 0 - no preference  
            Region 1 - prefers branching, cyclization, and unsaturation  
            Region 2 - prefers halides, alcohols, amines, and thiols  
            Region 3 - prefers ethers and analogues  
            Region 4 - prefers aldehydes, ketones, and analogues  
            Region 5 - prefers carboxys, derivatives, and anhydrides

    Returns
    -------
    mol : Chem.Mol
        A generated RDKit molecule whose complexity is scaled by 
        player level, difficulty setting, and regional modifiers.

    Notes
    -----
    - Invalid input values default to `(diff=1, exp=0, region=1)`.
    - Base molecules are straight-chain alkanes, later modified by `rng_*` functions.
    - Region-specific functions are prioritized before random modifications.
    - A budget system determines how many transformations are applied.
    """

    if diff < 1 or diff > 3 or exp < 0 or region < 0 or region > 5:
        diff, exp, region = 1, 0, 1

    # Derive level and budget
    level = level_from_exp(exp, diff)   # Change `level_from_exp()` function
    max_budget = complexity_budget(level)
    budget = random.randint(1 + max_budget // 3, max_budget)

    print("")
    print(f"--- Master Detection Apparatus: Level {level} | Budget {budget} ---")

    # Base skeleton
    mol = generate_alkane_straight(random.randint(2 + level // 4, max(6, level // 2)))

    # Region-specific functions
    region_list = {
        1: [rng_branching, rng_cyclization, rng_unsaturation],
        2: [rng_hetero],
        3: [rng_ethers],
        4: [rng_ketones],
        5: [rng_carboxys, rng_anhydrides],
    }

    # Apply a region function if valid
    if region in region_list:
        mol = random.choice(region_list[region])(mol)

    # Spend budget on random modifications
    while budget > 0:
        function = random.choice(rng_function_list)
        mol = function(mol)
        budget -= 1

    return mol


# Example
mda_randomization(1, 10000, 1)

--- Master Detection Apparatus: Level 15 | Budget 9 ---
------------ generated molecule CCCCC with 5 carbons
---BRANCH--- generated molecule CCCC(C)C at positions [1]
---BRANCH--- generated molecule CCCC(C)(C)C at positions [1]
---KETONE--- Generated molecule: CCCC(C)(C)C=O
  O added at position(s): [5]
  N added at position(s): []
  S added at position(s): []
---CYCLE---- generated molecule CC1(C=O)CCCC1 starting and closing at [4] and [6]
---KETONE--- Generated molecule: CC1(C=O)CCCC1=O
  O added at position(s): [6]
  N added at position(s): []
  S added at position(s): []
Error: Python argument types in
    rdkit.Chem.rdmolops.SanitizeMol(str)
did not match C++ signature:
    SanitizeMol(class RDKit::ROMol {lvalue} mol, unsigned __int64 sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, bool catchErrors=False)
---ETHER---- Generated molecule: CC1(C=O)CCOC1
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
Error: Explicit valence for atom # 5 C, 5, is greater than permitted
---ANHYDR--- Compound function, see upper lines for details
---HETERO--- Generated molecule: CC1(C=O)COCC1O
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [2]
  NH2 added at position(s): []
  SH added at position(s): []
Error: list.remove(x): x not in list

[16:08:45] Explicit valence for atom # 5 O, 3, is greater than permitted
[16:08:45] Explicit valence for atom # 5 C, 5, is greater than permitted

# Testing generation
test_list = []
for _ in range(20):
    test_list.append(mda_randomization(1, 5000, 1))

Draw.MolsToGridImage(
    test_list,
    subImgSize=(300, 300),
    molsPerRow=5
)

--- Master Detection Apparatus: Level 10 | Budget 3 ---
------------ generated molecule CCCCC with 5 carbons
---CYCLE---- generated molecule C1CCCC1 starting and closing at [0] and [4]
---KETONE--- Generated molecule: O=C1CCCC1
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: O=C1CCCO1
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
---CARBOXY-- Compound function, see upper lines for details
Error: list.remove(x): x not in list

--- Master Detection Apparatus: Level 10 | Budget 5 ---
------------ generated molecule CCCCC with 5 carbons
---CYCLE---- generated molecule C1CCCC1 starting and closing at [0] and [4]
---BRANCH--- generated molecule CC1CCCC1 at positions [2]
---BRANCH--- generated molecule CC1CCCC1C at positions [3]
---KETONE--- Generated molecule: CC1CCC(=O)C1C
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
Error: Explicit valence for atom # 7 O, 3, is greater than permitted
---BRANCH--- generated molecule CCC1CC(=O)C(C)C1C at positions [1]
---BRANCH--- generated molecule CCC1CC(=O)C(C)C1(C)C at positions [2]
---ETHER---- Generated molecule: CCC1OC(=O)C(C)C1(C)C
  O added at position(s): [0]
  N added at position(s): []
  S added at position(s): []

--- Master Detection Apparatus: Level 10 | Budget 3 ---
------------ generated molecule CCCCC with 5 carbons
---CYCLE---- generated molecule C1CCCC1 starting and closing at [0] and [4]
---KETONE--- Generated molecule: O=C1CCCC1
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: O=C1CCCO1
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
---CARBOXY-- Compound function, see upper lines for details
Error: Explicit valence for atom # 5 O, 3, is greater than permitted
---HETERO--- Generated molecule: O=C1OCCC1Br
  Cl added at position(s): []
  Br added at position(s): [0]
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []

--- Master Detection Apparatus: Level 10 | Budget 5 ---
------------ generated molecule CCCCCC with 6 carbons
---CYCLE---- generated molecule C1CCCCC1 starting and closing at [0] and [5]
---ETHER---- Generated molecule: C1CCOCC1
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: O=C1CCCC(=O)O1
  O added at position(s): [5, 3]
  N added at position(s): []
  S added at position(s): []
---ANHYDR--- Compound function, see upper lines for details
---ETHER---- Generated molecule: O=C1COCC(=O)O1
  O added at position(s): [1]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: O=C1OC(=O)C(=O)OC1=O
  O added at position(s): [2, 0]
  N added at position(s): []
  S added at position(s): []
---ANHYDR--- Compound function, see upper lines for details

--- Master Detection Apparatus: Level 10 | Budget 3 ---
------------ generated molecule CCCCCC with 6 carbons
---ADDBOND-- generated molecule C=CCCCC with double bonds are added at [4]; triple bonds are added at []
Error: list.remove(x): x not in list
---BRANCH--- generated molecule C=CCCC(C)CC at positions [1]
---BRANCH--- generated molecule C=CCC(C)C(C)CC at positions [2]
---BRANCH--- generated molecule C=CCC(C)(CC)C(C)CC at positions [2]
---BRANCH--- generated molecule C=CC(C)C(C)(CC)C(C)CC at positions [3]
---BRANCH--- generated molecule C=C(C)C(C)C(C)(CC)C(C)CC at positions [4]
---CYCLE---- generated molecule CCC(C)C1(C)CCC=C(C)C1C starting and closing at [5] and [10]

--- Master Detection Apparatus: Level 10 | Budget 4 ---
------------ generated molecule CCCC with 4 carbons
---ADDBOND-- generated molecule CC=CC with double bonds are added at [1]; triple bonds are added at []
---BRANCH--- generated molecule CC=C(C)C at positions [2]
---HETERO--- Generated molecule: CC(C)=C(C)O
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [1]
  NH2 added at position(s): []
  SH added at position(s): []
Error: list.remove(x): x not in list

--- Master Detection Apparatus: Level 10 | Budget 5 ---
------------ generated molecule CCCCCC with 6 carbons
---ADDBOND-- generated molecule CC=CCCC with double bonds are added at [3]; triple bonds are added at []
---HETERO--- Generated molecule: CC=CCC(C)O
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [1]
  NH2 added at position(s): []
  SH added at position(s): []
---ETHER---- Generated molecule: CC=CNC(C)O
  O added at position(s): []
  N added at position(s): [2]
  S added at position(s): []
---KETONE--- Generated molecule: CC(O)NC=CC=O
  O added at position(s): [5]
  N added at position(s): []
  S added at position(s): []
---BRANCH--- generated molecule CCC(C=O)=CNC(C)O at positions [4]
Error: Pre-condition Violation
	bond already exists
	Violation occurred on line 500 in file Code\GraphMol\RWMol.cpp
	Failed Expression: !(boost::edge(atomIdx1, atomIdx2, d_graph).second)
	RDKIT: 2024.09.6
	BOOST: 1_85


--- Master Detection Apparatus: Level 10 | Budget 3 ---
------------ generated molecule CCCCCC with 6 carbons
---ADDBOND-- generated molecule CC=CCCC with double bonds are added at [3]; triple bonds are added at []
---CYCLE---- generated molecule CCC1C=CC1 starting and closing at [2] and [5]
---HETERO--- Generated molecule: CCC1(Cl)C=CC1
  Cl added at position(s): [2]
  Br added at position(s): []
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []
---HETERO--- Generated molecule: OCCC1(Cl)C=CC1
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [0]
  NH2 added at position(s): []
  SH added at position(s): []

--- Master Detection Apparatus: Level 10 | Budget 3 ---
------------ generated molecule CCCCCC with 6 carbons
---CYCLE---- generated molecule CC1CCCC1 starting and closing at [1] and [5]
---ETHER---- Generated molecule: CC1CCOC1
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
---BRANCH--- generated molecule CC1COCC1C at positions [2]
---BRANCH--- generated molecule CC1COCC1(C)C at positions [1]

--- Master Detection Apparatus: Level 10 | Budget 3 ---
------------ generated molecule CCCCC with 5 carbons
---CYCLE---- generated molecule C1CCCC1 starting and closing at [0] and [4]
---ETHER---- Generated molecule: C1CCOC1
  O added at position(s): [0]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: O=C1CCOC1
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: O=C1COCO1
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []

--- Master Detection Apparatus: Level 10 | Budget 6 ---
------------ generated molecule CCCCC with 5 carbons
---CYCLE---- generated molecule C1CCCC1 starting and closing at [0] and [4]
---HETERO--- Generated molecule: OC1CCCC1
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [1]
  NH2 added at position(s): []
  SH added at position(s): []
---CYCLE---- generated molecule C1CC2OC2C1 starting and closing at [0] and [5]
---KETONE--- Generated molecule: O=C1CCC2OC12
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: O=C1OCC2OC12
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
---CARBOXY-- Compound function, see upper lines for details

--- Master Detection Apparatus: Level 10 | Budget 5 ---
------------ generated molecule CCCC with 4 carbons
---CYCLE---- generated molecule CC1CC1 starting and closing at [0] and [2]
---ETHER---- Generated molecule: CC1CO1
  O added at position(s): [1]
  N added at position(s): []
  S added at position(s): []
---KETONE--- Generated molecule: O=CC1CO1
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
Error: list.remove(x): x not in list

--- Master Detection Apparatus: Level 10 | Budget 3 ---
------------ generated molecule CCCCC with 5 carbons
---ADDBOND-- generated molecule CC=CCC with double bonds are added at [1]; triple bonds are added at []
---BRANCH--- generated molecule CC=CC(C)C at positions [3]
---BRANCH--- generated molecule CCC(C)=CC(C)C at positions [1]
---BRANCH--- generated molecule CC(=CC(C)C)C(C)C at positions [6]
---BRANCH--- generated molecule CC(=CC(C)(C)C)C(C)C at positions [3]
---BRANCH--- generated molecule CC(=C(C)C(C)(C)C)C(C)C at positions [2]

--- Master Detection Apparatus: Level 10 | Budget 4 ---
------------ generated molecule CCCCCC with 6 carbons
---CYCLE---- generated molecule CC1CCCC1 starting and closing at [1] and [5]
---BRANCH--- generated molecule CC1CCC(C)C1 at positions [3]
---BRANCH--- generated molecule CC1CCC(C)(C)C1 at positions [1]
---BRANCH--- generated molecule CCC1CC(C)CC1(C)C at positions [5]
---BRANCH--- generated molecule CCC1(C)CC(C)CC1(C)C at positions [5]
---BRANCH--- generated molecule CCC1C(C)CC(C)(CC)C1(C)C at positions [2]
---BRANCH--- generated molecule CCC1C(C)(C)CC(C)(CC)C1(C)C at positions [3]
---HETERO--- Generated molecule: CCC1C(C)(C)CC(C)(CC)C1(C)CO
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [7]
  NH2 added at position(s): []
  SH added at position(s): []

--- Master Detection Apparatus: Level 10 | Budget 5 ---
------------ generated molecule CCCCC with 5 carbons
---ADDBOND-- generated molecule CC=CCC with double bonds are added at [1]; triple bonds are added at []
---HETERO--- Generated molecule: CC=C(O)CC
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [2]
  NH2 added at position(s): []
  SH added at position(s): []
Error: list.remove(x): x not in list
---KETONE--- Generated molecule: CC=C(O)C(C)=O
  O added at position(s): [3]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: CC=C(O)C(N)=O
  O added at position(s): []
  N added at position(s): [4]
  S added at position(s): []
---CARBOXY-- Compound function, see upper lines for details
Error: list.remove(x): x not in list

--- Master Detection Apparatus: Level 10 | Budget 4 ---
------------ generated molecule CCCC with 4 carbons
---BRANCH--- generated molecule CCC(C)CC at positions [1]
---CYCLE---- generated molecule CCC1CCC1 starting and closing at [0] and [5]
---KETONE--- Generated molecule: CCC1CCC1=O
  O added at position(s): [4]
  N added at position(s): []
  S added at position(s): []
---ETHER---- Generated molecule: CCC1COC1=O
  O added at position(s): [5]
  N added at position(s): []
  S added at position(s): []
---CARBOXY-- Compound function, see upper lines for details
---BRANCH--- generated molecule CC(C)C1COC1=O at positions [2]
---BRANCH--- generated molecule CC(C)C1(C)COC1=O at positions [1]
---HETERO--- Generated molecule: CC(CO)C1(C)COC1=O
  Cl added at position(s): []
  Br added at position(s): []
  OH added at position(s): [3]
  NH2 added at position(s): []
  SH added at position(s): []

--- Master Detection Apparatus: Level 10 | Budget 6 ---
------------ generated molecule CCCCC with 5 carbons

[16:08:45] Explicit valence for atom # 7 O, 3, is greater than permitted
[16:08:45] Explicit valence for atom # 5 O, 3, is greater than permitted
[16:08:45] 

****
Pre-condition Violation
bond already exists
Violation occurred on line 500 in file C:\rdkit\build\temp.win-amd64-cpython-310\Release\rdkit\Code\GraphMol\RWMol.cpp
Failed Expression: !(boost::edge(atomIdx1, atomIdx2, d_graph).second)
****

[16:08:45] Explicit valence for atom # 2 O, 3, is greater than permitted
[16:08:45] Explicit valence for atom # 3 C, 5, is greater than permitted

---ADDBOND-- generated molecule C=CCCC with double bonds are added at [0]; triple bonds are added at []
---ETHER---- Generated molecule: C=COCC
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []
---HETERO--- Generated molecule: C=COCCCl
  Cl added at position(s): [4]
  Br added at position(s): []
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []
---ETHER---- Generated molecule: O=COCCCl
  O added at position(s): [0]
  N added at position(s): []
  S added at position(s): []
Error: list.remove(x): x not in list

--- Master Detection Apparatus: Level 10 | Budget 5 ---
------------ generated molecule CCCC with 4 carbons
---CYCLE---- generated molecule CC1CC1 starting and closing at [0] and [2]
---ETHER---- Generated molecule: CC1CO1
  O added at position(s): [0]
  N added at position(s): []
  S added at position(s): []
---ADDBOND-- generated molecule C=C1CO1 with double bonds are added at [2]; triple bonds are added at []

--- Master Detection Apparatus: Level 10 | Budget 5 ---
------------ generated molecule CCCC with 4 carbons
---CYCLE---- generated molecule CC1CC1 starting and closing at [1] and [3]
---BRANCH--- generated molecule CC1CC1C at positions [3]
---BRANCH--- generated molecule CC1CC1(C)C at positions [1]
---ETHER---- Generated molecule: CC1OC1(C)C
  O added at position(s): [2]
  N added at position(s): []
  S added at position(s): []
Error: Explicit valence for atom # 2 O, 3, is greater than permitted

--- Master Detection Apparatus: Level 10 | Budget 4 ---
------------ generated molecule CCCCCC with 6 carbons
---ADDBOND-- generated molecule CCC=CCC with double bonds are added at [2]; triple bonds are added at []
---ADDBOND-- generated molecule CCC#CCC with double bonds are added at []; triple bonds are added at [2]
Error: Explicit valence for atom # 3 C, 5, is greater than permitted
---HETERO--- Generated molecule: CCC#CC(C)Cl
  Cl added at position(s): [1]
  Br added at position(s): []
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []
---HETERO--- Generated molecule: CCC#CC(Cl)CBr
  Cl added at position(s): []
  Br added at position(s): [0]
  OH added at position(s): []
  NH2 added at position(s): []
  SH added at position(s): []
Error: Explicit valence for atom # 2 C, 5, is greater than permitted

[16:08:45] Explicit valence for atom # 2 C, 5, is greater than permitted

molecular_tag = {
    "alcohol": [4],              # OH on atom index 4
    "haloalkane": [7, 12],       # Halogens on atom indices 7 and 12
    "ketone": [22]               # C=O on atom index 22
}

# Dictionary of substructures
# Since we're doing a dictionary, it's very easy to scale up

functional_groups_dict = {

    # Comments denote the tuple index where the carbon of interest lands on.
    # Alkanes
    "primary alkanes": Chem.MolFromSmarts("[CH3;CX4]-[C]"),                              #all 0
    "secondary alkanes": Chem.MolFromSmarts("[CH2;CX4](-[C])-[C]"),
    "tertiary alkanes": Chem.MolFromSmarts("[CH;CX4](-[C])(-[C])-[C]"),
    "quaternary alkanes": Chem.MolFromSmarts("[C;CX4](-[C])(-[C])(-[C])-[C]"),

    # Alkenes
    "alkenes": Chem.MolFromSmarts("C=C"),                                                #all 0-1
    "terminal alkenes": Chem.MolFromSmarts("[CH2]=[C]"),
    "secondary alkenes": Chem.MolFromSmarts("[CH](=C)-[C]"),
    "saturated alkenes": Chem.MolFromSmarts("[C](=C)(-[C])-[C]"),
    "dienes": Chem.MolFromSmarts("C=C-C=C"),                                             #0,1,2,3

    # Alkynes
    "alkynes": Chem.MolFromSmarts("C#C"),                                                #all 0-1
    "terminal alkynes": Chem.MolFromSmarts("[CH]#C"),
    "saturated alkynes": Chem.MolFromSmarts("[C](#C)-[C]"),

    # Halides
    "halides": Chem.MolFromSmarts("[C][Br,Cl]"),                                         #all 0-1
    "primary halides": Chem.MolFromSmarts("[CH2]([Br,Cl])[C]"),
    "secondary halides": Chem.MolFromSmarts("[CH]([Br,Cl])([C])[C]"),
    "tertiary halides": Chem.MolFromSmarts("[C]([Br,Cl])([C])([C])[C]"),
    "terminal alkenyl halides": Chem.MolFromSmarts("[CH]([Br,Cl])=C"),                   #all 0-1
    "saturated alkenyl halides": Chem.MolFromSmarts("[C]([Br,Cl])([C])=C"),
    "alkynyl halides": Chem.MolFromSmarts("[C]([Br,Cl])#C"),
    "vicinal dihalides": Chem.MolFromSmarts("[C]([Br,Cl])[C]([Br,Cl])"),                 #0-1,2-3
    "geminal dihalides": Chem.MolFromSmarts("[C]([Br,Cl])[Br,Cl]"),                      #0

    # Alcohols
    "alcohols": Chem.MolFromSmarts("C-[OH]"),                                            #all 0-1
    "primary alcohols": Chem.MolFromSmarts("[CH2]([OH])[C]"),
    "secondary alcohols": Chem.MolFromSmarts("[CH]([OH])([C])[C]"),
    "tertiary alcohols": Chem.MolFromSmarts("[C]([OH])([C])([C])[C]"),
    "alkenols": Chem.MolFromSmarts("[C]([OH])=C"),
    "alkynols": Chem.MolFromSmarts("[C]([OH])#C"),
    "vicinal diols": Chem.MolFromSmarts("[C]([OH])[C]([OH])"),                           #0-1,2-3
    "geminal diols": Chem.MolFromSmarts("[C]([OH])[OH]"),                                #0

    # Carbonyls
    "carbonyls": Chem.MolFromSmarts("C=O"),                                              #all 0-1
    "aldehydes": Chem.MolFromSmarts("[CH]=O"),
    "ketones": Chem.MolFromSmarts("[C](=O)([C])[C]"),
    "hemiacetals": Chem.MolFromSmarts("[C]([OH])([O](C))([C])[C]"),                      #0-1,0-2
    "acetals": Chem.MolFromSmarts("[C]([O](C))([O](C))([C])[C]"),                        #0-1,0-2
    "imines": Chem.MolFromSmarts("[C](=N)([C])[C]"),                                     #0-1
    "aldols": Chem.MolFromSmarts("[C](=O)[C][C][OH]"),                                   #0-1,3-4
    "enones": Chem.MolFromSmarts("[C](=O)[C]=[C]"),                                      #0-1,3
    "beta dicarbonyls": Chem.MolFromSmarts("[C](=O)[C][C]=[O]"),                         #0-1,2,3-4
    "delta dicarbonyls": Chem.MolFromSmarts("[C](=O)[C][C][C][C]=[O]"),                  #0-1,5-6
    "cyclohexenones": Chem.MolFromSmarts("[C1](=O)[C]=[C][C][C][C][C1]"),                #0-1,3
    "acyloins": Chem.MolFromSmarts("[C](=O)[C][OH]"),                                    #0-1,2-3

    # Carboxys
    "carboxys": Chem.MolFromSmarts("[C](=O)[OH]"),                                       #all 0-1,2
    "acyl halides": Chem.MolFromSmarts("[C](=O)[Br,Cl]"),
    "esters": Chem.MolFromSmarts("[C](=O)[O][C]"),
    "amides": Chem.MolFromSmarts("[C](=O)[NX3]"),
    "anhydrides": Chem.MolFromSmarts("[C](=O)[O][C](=O)"),                               #0-1,2,3-4
    "peroxycarboxys": Chem.MolFromSmarts("[C](=O)[O][OH]"),                              #0-1,2,3
    "vicinal halocarboxys": Chem.MolFromSmarts("[C](=O)([OH])[C][Br,Cl]"),               #0-1,2,3-4
    "beta ketoesters": Chem.MolFromSmarts("[C](=O)[C][C](=O)[O][C]"),                    #0-1,2,3-4

    # Ethers
    "ethers": Chem.MolFromSmarts("[C][O][C]"),                                           #all 0,2
    "epoxides": Chem.MolFromSmarts("[C1][O][C1]"),

    # Amines
    "amines": Chem.MolFromSmarts("[NX3][C]"),                                            #all 0
    "primary amines": Chem.MolFromSmarts("[NH2][C]"),
    "secondary amines": Chem.MolFromSmarts("[NH]([C])[C]"),
    "tertiary amines": Chem.MolFromSmarts("[N]([C])([C])[C]"),
    "quaternary amines": Chem.MolFromSmarts("[N+]([C])([C])([C])[C]"),
    "nitros": Chem.MolFromSmarts("[N+]([O-])(=[O])[C]"),                                 #0,1
    "nitriles": Chem.MolFromSmarts("C#N"),                                               #all 0
    "alkyl azides": Chem.MolFromSmarts("[C]=[N]=[N+]=[N-]"),
    "N-nitrosamines": Chem.MolFromSmarts("[C][N][N]=[O]"),

    # Nucleophiles
    "nuc halides": Chem.MolFromSmarts("[Br,Cl;H1]"),                                     #all 0
    "nuc hydroxys": Chem.MolFromSmarts("[O,S;H1,H2]"),
    "nuc amines": Chem.MolFromSmarts("[N;H1,H2,H3]"),
    "nuc carbonyls": Chem.MolFromSmarts("[C;H1,H2,H3]C=[O,N]"),
    "nuc nitriles": Chem.MolFromSmarts("[C;H1]#N"),
    "nuc hydrides": Chem.MolFromSmarts("[H-]"),
    "nuc carbanions": Chem.MolFromSmarts("[C-]"),

    # Leaving groups
    "lg halides": Chem.MolFromSmarts("[Br,Cl][C]"),                                      #all 0-1
    "lg hydroxys": Chem.MolFromSmarts("[O,S][C]"),
    "lg amines": Chem.MolFromSmarts("[NX3][C]"),
    "lg carbonyls": Chem.MolFromSmarts("[O,N]=[C]"),

    # Ylides
    "ylides": Chem.MolFromSmarts('[C]=[P](c1ccccc1)(c2ccccc2)(c3ccccc3)')                #all 0
}

print("YOUR MOTHER!!! YO MAMA YO MAMA YO MMAMAMAMAMAAA")   # VERY IMPORTANT CODE DO NOT ALTER!!! -Audrey "cogsci" Dao

YOUR MOTHER!!! YO MAMA YO MAMA YO MMAMAMAMAMAAA

# Parse molecules function

def parse_molecule(mol: Chem.Mol) -> dict:
    """
    Parse a molecule for predefined functional groups using SMARTS patterns.

    This function scans the input molecule against all patterns stored in
    `functional_groups_dict`. Each match is recorded under its functional
    group name, with atom index locations stored for later use in reaction
    handling.

    Parameters
    ----------
    mol : Chem.Mol
        RDKit molecule object to be analyzed.

    Returns
    -------
    molecule_tag : dict
        Dictionary of detected substructures with the following format:

        {
            "functional_group_1": [ (atom_idx1, atom_idx2, ...), ... ],
            "functional_group_2": [ (atom_idx3, atom_idx4, ...), ... ],
            ...
        }

        Each entry contains one or more tuples of atom indices corresponding
        to substructure matches.

    Notes
    -----
    - Functional group definitions must be provided in the global
      `functional_groups_dict` as RDKit SMARTS patterns.
    - Atom indices in the returned dictionary correspond to the RDKit
      molecule atom ordering.
    - A parse log is printed showing the SMILES of the molecule and the
      number of functional groups detected.
    """

    # define the empty dictionaries
    # define individual molecule dictionaries
    molecule_tag = {}

    for name, pattern in functional_groups_dict.items():
        matches = mol.GetSubstructMatches(pattern)
        if matches:
            molecule_tag[name] = matches
    
    print("")
    print(f"PARSE LOG: molecule {Chem.MolToSmiles(mol)} has been parsed with tag size of {len(molecule_tag)}")
    return molecule_tag


# Example
test_mol = mda_randomization(1, 2400, 0)

print(parse_molecule(test_mol))
test_mol

--- Master Detection Apparatus: Level 6 | Budget 1 ---
------------ generated molecule CCCCCC with 6 carbons
---ADDBOND-- generated molecule C=CCCCC with double bonds are added at [4]; triple bonds are added at []

PARSE LOG: molecule C=CCCCC has been parsed with tag size of 5
{'primary alkanes': ((0, 1),), 'secondary alkanes': ((1, 0, 2), (2, 1, 3), (3, 2, 4)), 'alkenes': ((4, 5),), 'terminal alkenes': ((5, 4),), 'secondary alkenes': ((4, 5, 3),)}

# General substructure filtering

def filter_substructure(mol_tags: dict, filter_list: list):
    """
    Filter substructures from a molecule tag dictionary.

    Parameters
    ----------
    mol_tags : dict
        Dictionary of substructures returned from `parse_molecules()`.  
        Keys correspond to functional group names, and values are lists
        of atom index tuples where the substructure occurs.
    filter_list : list of str
        List of functional group names (keys from `functional_groups_dict`)
        to search for and extract.

    Returns
    -------
    filtered_substructures : list of tuples
        List of atom index tuples corresponding to the filtered
        substructures present in the molecule.

    Notes
    -----
    - If a substructure from `filter_list` is not found in `mol_tags`,
      it is silently skipped.
    - Useful for defining chemical roles such as leaving groups,
      nucleophiles, or carbonyl-containing motifs.
    """

    filtered_substructures = []

    for substructure in filter_list:
        try:
            current_filter = [t for t in mol_tags[substructure]]
            filtered_substructures += current_filter
            print(f"FILTER LOG: {substructure} filtered")
        except:
            pass

    return filtered_substructures

# Nucleophile and leaving group filtering
def filter_nucleophiles(mol_tags: dict):
    """
    Extract nucleophilic substructures from a molecule.

    Parameters
    ----------
    mol_tags : dict
        Dictionary of parsed molecule tags, typically obtained from
        `parse_molecules(mol)`. Keys correspond to substructure
        names and values are lists of atom index tuples.

    Returns
    -------
    nucleophiles : list of tuple
        List of atom index tuples corresponding to nucleophilic
        substructures found in the molecule.

    Notes
    -----
    - Internally wraps `filter_substructure()` with a predefined list of
      nucleophilic groups:
      halides, hydroxys, amines, carbonyls, nitriles, hydrides,
      and carbanions.
    - This function is a convenience wrapper for quickly isolating
      nucleophilic atoms without manually specifying filters.
    """

    print("Nucleophiles are being filtered...")

    return filter_substructure(
        mol_tags, 
        [
            'nuc halides', 
            'nuc hydroxys', 
            'nuc amines', 
            'nuc carbonyls', 
            'nuc nitriles', 
            'nuc hydrides', 
            'nuc carbanions'
        ]
    )

def filter_leaving_groups(mol_tags: dict):
    """
    Extract leaving group substructures from a molecule.

    Parameters
    ----------
    mol_tags : dict
        Dictionary of parsed molecule tags, typically obtained from
        `parse_molecules(mol)`. Keys correspond to substructure
        names and values are lists of atom index tuples.

    Returns
    -------
    nucleophiles : list of tuple
        List of atom index tuples corresponding to leaving group
        substructures found in the molecule.

    Notes
    -----
    - Internally wraps `filter_substructure()` with a predefined list of
      nucleophilic groups:
      halides, hydroxys, amines, and carbonyls (in enol form).
    - This function is a convenience wrapper for quickly isolating
      leaving groups without manually specifying filters.
    """

    print("Leaving groups are being filtered...")
    
    return filter_substructure(
        mol_tags, 
        [
            'lg halides', 
            'lg hydroxys', 
            'lg amines', 
            'lg carbonyls'
        ]
    )


# Example
test_mol = mda_randomization(1, 2000, 0)
test_tags = parse_molecule(test_mol)

print(filter_nucleophiles(test_tags))
print(filter_leaving_groups(test_tags))
test_mol

--- Master Detection Apparatus: Level 5 | Budget 2 ---
------------ generated molecule CCCC with 4 carbons
---CYCLE---- generated molecule CC1CC1 starting and closing at [1] and [3]
---CYCLE---- generated molecule C1C2CC12 starting and closing at [0] and [2]

PARSE LOG: molecule C1C2CC12 has been parsed with tag size of 2
Nucleophiles are being filtered...
[]
Leaving groups are being filtered...
[]

# Compare substitution levels

def compare_subs(
        mol: Chem.Mol, 
        idx1: int, 
        idx2: int, 
        more_less: bool
    ) -> int:
    """
    Compare the substitution levels of two carbons and return one carbon index.

    Parameters
    ----------
    mol : Chem.Mol
        RDKit molecule containing the carbons of interest.
    idx1 : int
        Atom index of the first carbon.
    idx2 : int
        Atom index of the second carbon.
    more_less : bool
        If True, return the more substituted carbon.  
        If False, return the less substituted carbon.

    Returns
    -------
    int
        Atom index of the selected carbon.

    Notes
    -----
    - Substitution is defined by the degree of the carbon atom (number of attached atoms).  
    - If both carbons have the same degree, one of them is chosen randomly.
    """

    mol = Chem.RWMol(mol)
    atom1 = mol.GetAtomWithIdx(idx1).GetDegree()
    atom2 = mol.GetAtomWithIdx(idx2).GetDegree()

    if atom1 < atom2:
        return idx2 if more_less == True else idx1
    elif atom1 > atom2:
        return idx1 if more_less == True else idx2
    else:
        return random.choices([idx1, idx2], k=1)[0]


# decrease number of bonds or breaking it
def decrease_bond(
        mol: Chem.Mol, 
        start_idx: int, 
        end_idx: int
    ) -> Chem.Mol:
    """
    Decrease the bond order between two atoms in a molecule.

    Parameters
    ----------
    mol : Chem.Mol
        RDKit molecule containing the bond to be modified.
    start_idx : int
        Atom index of the first atom in the bond.
    end_idx : int
        Atom index of the second atom in the bond.

    Returns
    -------
    Chem.Mol
        A new RDKit molecule with the modified bond order.

    Notes
    -----
    - Triple bonds are converted to double bonds.  
    - Double bonds are converted to single bonds.  
    - Single bonds are removed entirely.  
    - If the operation invalidates the molecule, the original molecule is returned.  
    - Bond sanitization is attempted after modification.
    """
    new_mol = Chem.RWMol(mol)
    bond = new_mol.GetBondBetweenAtoms(start_idx, end_idx)

    # Change the bond type (decrease order)
    if bond.GetBondType() == Chem.BondType.DOUBLE:
        bond.SetBondType(Chem.BondType.SINGLE)
    elif bond.GetBondType() == Chem.BondType.TRIPLE:
        bond.SetBondType(Chem.BondType.DOUBLE)
    elif bond.GetBondType() == Chem.BondType.SINGLE:
        new_mol.RemoveBond(start_idx, end_idx)

    try:
        Chem.SanitizeMol(mol)
    except:
        print(f"Error in decreasing molecule {Chem.MolToSmiles(mol)} bond order")
        return mol
    
    print(f"Bond order is decreased at bond [{start_idx}, {end_idx}]")
    return new_mol

# Increasing the bond order or forming one
def increase_bond(
        mol: Chem.Mol, 
        start_idx: int, 
        end_idx: int
    ) -> Chem.Mol:
    """
    Increase the bond order between two atoms in a molecule.

    Parameters
    ----------
    mol : Chem.Mol
        RDKit molecule containing the bond to be modified.
    start_idx : int
        Atom index of the first atom in the bond.
    end_idx : int
        Atom index of the second atom in the bond.

    Returns
    -------
    Chem.Mol
        A new RDKit molecule with the modified bond order.

    Notes
    -----
    - Single bonds are converted to double bonds.  
    - Double bonds are converted to triple bonds.  
    - If the bond does not exist, a new single bond is added.  
    - If the operation invalidates the molecule, the original molecule is returned.  
    - Bond sanitization is attempted after modification.
    """
    new_mol = Chem.RWMol(mol)
    
    # Change the bond type (decrease order)
    try:
        bond = new_mol.GetBondBetweenAtoms(start_idx, end_idx)
        if bond.GetBondType() == Chem.BondType.DOUBLE:
            bond.SetBondType(Chem.BondType.TRIPLE)
        elif bond.GetBondType() == Chem.BondType.SINGLE:
            bond.SetBondType(Chem.BondType.DOUBLE)
    except:
        new_mol.AddBond(start_idx, end_idx, Chem.BondType.SINGLE)

    try:
        Chem.SanitizeMol(mol)
    except:
        print(f"Error in increasing molecule {Chem.MolToSmiles(mol)} bond order")
        return mol
    
    print(f"Bond order is increased at bond [{start_idx}, {end_idx}]")
    return new_mol

# Separate molecules
def separate_mols(combined_mol: Chem.Mol) -> list:
    """
    Separate a combined RDKit molecule into its individual components.

    Parameters
    ----------
    combined_mol : Chem.Mol
        A combined RDKit molecule that may contain multiple disconnected fragments.

    Returns
    -------
    list of Chem.Mol
        A list of separated molecules.  
        Molecules with fewer than 2 atoms (excluding hydrogens) are omitted.

    Notes
    -----
    - Fragments are obtained using `Chem.GetMolFrags(..., asMols=True)`.  
    - Monoatomic species (e.g., isolated hydrogens or ions) are excluded.  
    - Each valid fragment is sanitized before being returned.  
    """

    print("separating molecules...")
    frag = Chem.GetMolFrags(combined_mol, asMols=True)

    # Just in case the molecule is monoatomic (excluding hydrogens), we will get rid of it
    new_molecules = []
    for mol in frag:
        if mol.GetNumAtoms() < 2:
            print(f"Molecule {Chem.MolToSmiles(mol)} is omitted due to small size")
            continue
        new_molecules.append(mol)
    
    for mol in new_molecules:
        Chem.SanitizeMol(mol)
    
    print("separation complete...")
    return new_molecules

def rxn_substitution(
        nucleophile: Chem.Mol, 
        electrophile: Chem.Mol, 
        attack_sites: tuple[int, int, int], 
        intramolecular: bool = False
    ) -> list[Chem.Mol]:
    """
    Perform a substitution reaction between a nucleophile and an electrophile. 

    The reaction generalizes substitution to include both traditional leaving 
    groups and π-bond dissociation (treated as "leaving" to form a new σ-bond). 
    Addition reactions are excluded. For simplicity, resulting molecules are not 
    parsed with `parse_molecule()`.

    Parameters
    ----------
    nucleophile : rdkit.Chem.Mol
        RDKit molecule acting as the nucleophile.
    electrophile : rdkit.Chem.Mol
        RDKit molecule acting as the electrophile.
    attack_sites : tuple of int
        A 3-element tuple `(nuc_idx, elec_idx, break_idx)`:
        - `nuc_idx`: Atom index on the nucleophile performing the attack.
        - `elec_idx`: Atom index on the electrophile being attacked.
        - `break_idx`: Atom index on the electrophile bonded to `elec_idx` 
          that will be broken during substitution.
    intramolecular : bool, optional (default=False)
        Whether the reaction is intramolecular (`True`) or intermolecular (`False`).

    Returns
    -------
    result : list of rdkit.Chem.Mol
        List of RDKit molecules representing the products of the substitution.

    Notes
    -----
    - If bond modifications fail, the original input molecules are returned.
    - Debug messages provide detailed trace of the reaction process.
    """
    # Prepare writable molecules
    nuc, elec = Chem.RWMol(nucleophile), Chem.RWMol(electrophile)
    combined = Chem.CombineMols(nuc, elec) if not intramolecular else nuc
    offset = nuc.GetNumAtoms()

    # Normalize atom indices after combining
    nuc_idx = attack_sites[0]
    elec_idx = attack_sites[1] + offset if not intramolecular else attack_sites[1]
    break_idx = attack_sites[2] + offset if not intramolecular else attack_sites[2]

    print("\n========== SUBSTITUTION REACTION START ==========")
    print(f"Nucleophile: {Chem.MolToSmiles(nuc)} (attacking atom index: {nuc_idx})")
    print(f"Electrophile: {Chem.MolToSmiles(elec)} (bond to break: {attack_sites[1]}-{attack_sites[2]})")

    # Perform substitution
    try:
        new_combined = decrease_bond(combined, elec_idx, break_idx)
        new_combined = increase_bond(new_combined, nuc_idx, elec_idx)
    except Exception as e:
        print(f"[ERROR] Substitution failed: {e}")
        return [nuc, elec]

    print(f"[INFO] Atom {nuc_idx} successfully attacked atom {elec_idx}")
    print(f"[INFO] Intermediate product SMILES: {Chem.MolToSmiles(new_combined)}")

    try:
        result = separate_mols(new_combined)
    except Exception as e:
        print(f"[ERROR] Failed to separate molecules: {e}")
        return [nuc, elec]

    print("=========== SUBSTITUTION REACTION END ===========\n")
    return result



def check_substitution(mol_list: list[Chem.Mol]):
    """
    Check substitution eligibility and execute a substitution reaction if possible.

    This function takes in a list of 1 or 2 molecules and determines if a substitution 
    reaction can occur. Eligible nucleophilic and leaving-group sites are filtered, 
    and attack parameters are randomized. Returns the product molecules if eligible, 
    otherwise returns False.

    Parameters
    ----------
    mol_list : list of rdkit.Chem.Mol
        List containing 1 (intramolecular) or 2 (intermolecular) molecules.

    Returns
    -------
    list of rdkit.Chem.Mol or bool
        - If eligible, the resulting molecule(s) from `rxn_substitution`.
        - If not eligible, returns `False`.

    Notes
    -----
    - Nucleophilic and leaving-group sites are determined using `parse_molecule()` 
      and filtered with `filter_nucleophiles()` and `filter_leaving_groups()`.
    - For intramolecular reactions, the single molecule acts as both nucleophile 
      and electrophile.
    - For intermolecular reactions, either molecule can serve as nucleophile or 
      electrophile; eligible groups are randomly chosen.
    """
    print("\n========== SUBSTITUTION CHECK START ==========")

    # Validate molecule list size
    if len(mol_list) == 1:
        intramolecular = True
        print("[INFO] Substitution will be intramolecular")
    elif len(mol_list) == 2:
        intramolecular = False
        print("[INFO] Substitution will be intermolecular")
    else:
        print("[ERROR] Substitution requires 1 or 2 molecules; received "
              f"{len(mol_list)}")
        return False

    # Parse molecules and filter nucleophiles / leaving groups
    nuc_sites, lg_sites = {}, {}
    print("[INFO] Parsing and filtering molecule sites...")
    for i, mol in enumerate(mol_list):
        tags = parse_molecule(mol)
        nuc_sites[i] = filter_nucleophiles(tags)
        lg_sites[i] = filter_leaving_groups(tags)

    # Choose random eligible positions and attempt reaction
    try:
        if intramolecular:
            chosen_nuc = random.choice(nuc_sites[0])
            chosen_lg = random.choice(lg_sites[0])
            attack_tuple = (chosen_nuc[0], chosen_lg[1], chosen_lg[0])
            return rxn_substitution(mol_list[0], mol_list[0], attack_tuple, True)

        else:
            eligible_pairs = []

            # mol0 nucleophile, mol1 leaving group
            if nuc_sites[0] and lg_sites[1]:
                eligible_pairs.append([
                    random.choice(nuc_sites[0]),
                    random.choice(lg_sites[1]),
                    mol_list[0], mol_list[1]
                ])

            # mol1 nucleophile, mol0 leaving group
            if nuc_sites[1] and lg_sites[0]:
                eligible_pairs.append([
                    random.choice(nuc_sites[1]),
                    random.choice(lg_sites[0]),
                    mol_list[1], mol_list[0]
                ])

            chosen_group = random.choice(eligible_pairs)
            nuc_mol, lg_mol = chosen_group[2], chosen_group[3]
            attack_tuple = (chosen_group[0][0], chosen_group[1][1], chosen_group[1][0])
            return rxn_substitution(nuc_mol, lg_mol, attack_tuple)

    except Exception as e:
        print(f"[ERROR] {e}")
        return False

    finally:
        print("=========== SUBSTITUTION CHECK END ===========\n")


# Example
test_nuc = Chem.MolFromSmiles('CC(O)CCC(O)C[Br]')
test_elec = Chem.MolFromSmiles('CC[Br]')
test_substitution = check_substitution([test_nuc, test_elec])

Draw.MolsToGridImage(
    [test_nuc, test_elec] + test_substitution,
    subImgSize = (300, 300),
    molsPerRow = 4
)

========== SUBSTITUTION CHECK START ==========
[INFO] Substitution will be intermolecular
[INFO] Parsing and filtering molecule sites...

PARSE LOG: molecule CC(O)CCC(O)CBr has been parsed with tag size of 9
Nucleophiles are being filtered...
FILTER LOG: nuc hydroxys filtered
Leaving groups are being filtered...
FILTER LOG: lg halides filtered
FILTER LOG: lg hydroxys filtered

PARSE LOG: molecule CCBr has been parsed with tag size of 4
Nucleophiles are being filtered...
Leaving groups are being filtered...
FILTER LOG: lg halides filtered

========== SUBSTITUTION REACTION START ==========
Nucleophile: CC(O)CCC(O)CBr (attacking atom index: 2)
Electrophile: CCBr (bond to break: 1-2)
Bond order is decreased at bond [10, 11]
Bond order is increased at bond [2, 10]
[INFO] Atom 2 successfully attacked atom 10
[INFO] Intermediate product SMILES: Br.CCOC(C)CCC(O)CBr
separating molecules...
Molecule [Br] is omitted due to small size
separation complete...
=========== SUBSTITUTION REACTION END ===========

=========== SUBSTITUTION CHECK END ===========

# Function that fetch atom neighbors and choose a random eligible neighbor
# Function is created with elimination reactions in mind
def fetch_neighbors(mol: Chem.Mol, index: int) -> int:
    """
    Fetches a neighboring carbon atom that is not fully substituted.

    This function identifies all neighboring atoms of a specified atom index in
    the given molecule and returns one that satisfies the following criteria:
    1. The atom is a carbon.
    2. The carbon has at least one available hydrogen (i.e., not fully substituted).

    Parameters
    ----------
    mol : Chem.Mol
        The RDKit molecule to search within.
    index : int
        The index of the atom whose neighbors are being evaluated.

    Returns
    -------
    int
        The index of a randomly selected eligible neighboring carbon atom.

    Notes
    -----
    If no eligible neighboring carbon exists, the function will return False.
    """


    neighbors = mol.GetAtomWithIdx(index).GetNeighbors()

    eligible_carbons = []
    for carbon in neighbors:
        if carbon.GetAtomicNum() == 6 and carbon.GetNumImplicitHs() > 0:
            eligible_carbons.append(carbon.GetIdx())

    try:
        selected_carbon = random.choice(eligible_carbons)
    except Exception as e:
        print(f"Error: {e}")
        return False

    return selected_carbon


# Elimination generalization
def rxn_elimination(molecule: Chem.Mol, indexes: tuple) -> list:
    """
    Performs an elimination reaction on a molecule, forming a double bond
    by removing a hydrogen and ejecting a leaving group.

    This reaction is modeled as an acid-base type reaction, so no explicit base
    is required. The function outputs an array for consistency with other reaction functions.

    Parameters
    ----------
    molecule : Chem.Mol
        The RDKit molecule to undergo elimination.
    indexes : tuple of int
        Tuple (C, LG, neighbor) where:
        - C: index of the carbon bearing the leaving group
        - LG: index of the leaving group
        - neighbor: index of the adjacent carbon where the double bond forms

    Returns
    -------
    list of Chem.Mol
        A list containing the resulting molecule(s) after elimination.
        Small fragments may be separated and returned individually.

    Notes
    -----
    The output is a list for consistency with other reaction functions like substitution.
    If bond manipulation fails or the molecule cannot be separated, the original molecule
    is returned in a single-element list.

    """
    mol = Chem.RWMol(molecule)

    print("\n=========== ELIMINATION REACTION START ===========")
    print(f"Molecule: {Chem.MolToSmiles(molecule)} | Bond indexes involved: {indexes}")

    new_combined = decrease_bond(mol, indexes[0], indexes[1])
    new_combined = increase_bond(new_combined, indexes[0], indexes[2])

    try:
        result = separate_mols(new_combined)
    except Exception as e:
        print(f"[ERROR] Elimination failed: {e}")
        return [molecule]

    print(f"[RESULT] Elimination generated molecule: {Chem.MolToSmiles(new_combined)}")
    print("=========== ELIMINATION REACTION END ===========\n")
    return result


# Check function
def check_elimination(mol_array: list):
    """
    Checks if a molecule is eligible for elimination uwu~ 
    and executes the elimination reaction if possible.

    Parameters
    ----------
    mol_array : list of Chem.Mol
        A list containing exactly one molecule to check.

    Returns
    -------
    list of Chem.Mol or bool
        - If eligible, returns the result of `rxn_elimination` (list of molecules)
        - If not eligible, returns False

    Notes
    -----
    This function parses the molecule to find leaving groups, selects a random
    eligible leaving group, and identifies a neighboring carbon to form a double bond.
    The function is designed for single-molecule eliminations (intramolecular).
    """
    print("\n=========== ELIMINATION CHECK END ===========")

    # mol_array size check
    if len(mol_array) != 1:
        print("CHECK ERROR: Elimination only requires 1 molecule")
        return False
    
    # leaving group and nucleophile filtering
    print("CHECK LOG: Parsing and filtering molecules")

    parsed = parse_molecule(mol_array[0])
    filter_lg = filter_leaving_groups(parsed)

    # choosing random eligible positions
    try:
        chosen_lg = random.choice(filter_lg)
        neighbor_index = fetch_neighbors(mol_array[0], chosen_lg[1])
        return rxn_elimination(mol_array[0], (chosen_lg[1], chosen_lg[0], neighbor_index))
    except Exception as e:
        print(f"Error: {e}")
        return False
    finally:
        print("=========== ELIMINATION CHECK END ===========\n")


# Example
test_mol = Chem.MolFromSmiles('[Br]CCC(CCCCCCCC)OCC')
test_elimination = check_elimination([test_mol])

Draw.MolsToGridImage(
    [test_mol] + test_elimination,
    subImgSize = (300, 300),
    molsPerRow = 4
)

=========== ELIMINATION CHECK END ===========
CHECK LOG: Parsing and filtering molecules

PARSE LOG: molecule CCCCCCCCC(CCBr)OCC has been parsed with tag size of 7
Leaving groups are being filtered...
FILTER LOG: lg halides filtered
FILTER LOG: lg hydroxys filtered

=========== ELIMINATION REACTION START ===========
Molecule: CCCCCCCCC(CCBr)OCC | Bond indexes involved: (1, 0, 2)
Bond order is decreased at bond [1, 0]
Bond order is increased at bond [1, 2]
separating molecules...
Molecule [Br] is omitted due to small size
separation complete...
[RESULT] Elimination generated molecule: Br.C=CC(CCCCCCCC)OCC
=========== ELIMINATION REACTION END ===========

=========== ELIMINATION CHECK END ===========

def rxn_addition(
        donor: Chem.Mol, 
        acceptor: Chem.Mol, 
        pi_bond: tuple, 
        add_sites: tuple
    ) -> list:
    """
    Perform an electrophilic addition reaction between a pi-donor and a pi-acceptor.

    This function simulates addition reactions where a nucleophilic pi bond
    (alkene or alkyne) reacts with an electrophile. The pi bond is broken,
    and new sigma bonds are formed with the acceptor atoms. Both 
    Markovnikov and non-Markovnikov outcomes can be modeled depending
    on the indices provided by external detector functions.

    Parameters
    ----------
    donor : rdkit.Chem.Mol
        The pi-donor molecule (e.g., alkene, alkyne).
    acceptor : rdkit.Chem.Mol
        The pi-acceptor molecule (e.g., hydrogen halide, alcohol).
    pi_bond : tuple of int
        A pair `(i, j)` representing the atom indices of the donor's pi bond.
        - `i` is the carbon forming a carbocation (site of electrophilic attack).
        - `j` is the carbon attacking the acceptor nucleophile.
    add_sites : tuple of int
        A tuple `(x, y)` where:
        - `x` is the nucleophilic atom in the acceptor.
        - `y` (optional) is the electrophilic atom in the acceptor.  
          If the acceptor is monoatomic (e.g., H⁺), only `(x,)` is needed.

    Returns
    -------
    list of rdkit.Chem.Mol
        Array of resulting molecules for consistency with other reaction functions.

    Notes
    -----
    - Handles both hydrogen electrophiles (monoatomic) and polyatomic acceptors.
    - If `y` is not provided, hydrogen is assumed as the electrophile.
    - Output is wrapped in a list for consistency with substitution/elimination.
    """
    donor_rw, acceptor_rw = Chem.RWMol(donor), Chem.RWMol(acceptor)
    combined = Chem.CombineMols(donor_rw, acceptor_rw)
    offset = donor_rw.GetNumAtoms()

    nuc_pos = add_sites[0] + offset
    elec_pos = add_sites[1] + offset if len(add_sites) > 1 else None

    print("\n============ ADDITION REACTION START ============")
    print(f"[INFO] Donor (π system): {Chem.MolToSmiles(donor_rw)}")
    print(f"[INFO] Acceptor: {Chem.MolToSmiles(acceptor_rw)}")
    print(f"[DEBUG] Donor π-bond atoms: {pi_bond}, Acceptor sites: {add_sites}\n")

    # Break pi bond
    new_combined = decrease_bond(combined, pi_bond[0], pi_bond[1])

    # First bond formation (donor -> acceptor)
    new_combined = increase_bond(new_combined, nuc_pos, pi_bond[0])

    # Second bond formation if electrophile exists
    if elec_pos is not None:
        new_combined = increase_bond(new_combined, elec_pos, pi_bond[1])
        print(f"[STEP] Donor atom {pi_bond[0]} attacked electrophile at acceptor atom {elec_pos}")
    else:
        print(f"[STEP] Donor atom {pi_bond[0]} attacked electrophilic hydrogen")

    print(f"[STEP] Acceptor nucleophile atom {nuc_pos} bonded with donor atom {pi_bond[1]}")

    try:
        result = separate_mols(new_combined)
    except Exception as e:
        print(f"[ERROR] Molecule separation failed: {e}")
        return [donor, acceptor]

    print(f"[RESULT] Generated product: {Chem.MolToSmiles(new_combined)}")
    print("============ ADDITION REACTION END ============\n")

    return result


# Example
test_donor = Chem.MolFromSmiles('CCCC=C(C)C')
test_acceptor = Chem.MolFromSmiles('[Br]')
test_addition = rxn_addition(test_donor, test_acceptor, (4, 3), (0,))

Draw.MolsToGridImage(
    [test_donor, test_acceptor] + test_addition,
    subImgSize = (300, 300),
    molsPerRow = 4
)

============ ADDITION REACTION START ============
[INFO] Donor (π system): CCCC=C(C)C
[INFO] Acceptor: [Br]
[DEBUG] Donor π-bond atoms: (4, 3), Acceptor sites: (0,)

Bond order is decreased at bond [4, 3]
Bond order is increased at bond [7, 4]
[STEP] Donor atom 4 attacked electrophilic hydrogen
[STEP] Acceptor nucleophile atom 7 bonded with donor atom 3
separating molecules...
separation complete...
[RESULT] Generated product: CCCCC(C)(C)[Br]
============ ADDITION REACTION END ============

# Hydrogenation
def rxn_hydrogenation(molecule: Chem.Mol, tuple: tuple) -> list:
    """
    Perform a hydrogenation reaction on an alkene or alkyne.

    This function simulates hydrogenation by reducing a double or triple bond 
    between two specified atoms. The bond order is decreased (π bonds removed), 
    which represents the addition of hydrogen atoms across the bond. Only works 
    on alkenes and alkynes.

    Parameters
    ----------
    molecule : Chem.Mol
        The RDKit molecule to be hydrogenated.
    tuple : tuple of int
        A 2-element tuple (x, y) representing the atom indices of the double or 
        triple bond to hydrogenate. Order does not matter.

    Returns
    -------
    list of Chem.Mol
        A list containing the hydrogenated molecule.

    Notes
    -----
    - This function does not explicitly add hydrogen atoms; it reduces the bond order.
    - Input validation assumes the provided indices correspond to a C=C or C≡C bond.
    - The resulting molecule is returned as a single-element list for consistency 
      with other reaction functions.
    """

    mol = Chem.RWMol(molecule)

    print(f"\n============ HYDROGENATION REACTION START ============")
    print(f"Molecule of Interest: {Chem.MolToSmiles(mol)} || Bond Indexes: {tuple}")
    
    product = decrease_bond(mol, tuple[0], tuple[1])

    print(f"[STEP]: generated molecule {Chem.MolToSmiles(product)}")
    print(f"============ HYDROGENATION REACTION END ============\n")

    return [product]


def check_hydrogenation(mol_array: list):
    """
    Check if a molecule is eligible for hydrogenation and, if so, perform the reaction.

    This function validates whether a single input molecule contains an alkene or 
    alkyne substructure suitable for hydrogenation. If eligible, it calls 
    `rxn_hydrogenation()` with a randomly chosen double or triple bond. If not, 
    it returns `False`.

    Parameters
    ----------
    mol_array : list of Chem.Mol
        A list containing exactly one RDKit molecule.

    Returns
    -------
    list of Chem.Mol or bool
        - If eligible, a list containing the hydrogenated molecule.  
        - If not eligible, returns ``False``.

    Notes
    -----
    - Hydrogenation is restricted to molecules with double or triple bonds.  
    - Only one molecule should be provided in `mol_array`.  
    - Randomly selects one eligible bond if multiple exist.
    """
    print("\n============ HYDROGENATION CHECK START ============")

    if len(mol_array) != 1:
        print("[ERROR] more than 1 molecules are present, only 1 is needed.")
        return False
    
    eligible_bonds = filter_substructure(
        parse_molecule(mol_array[0]),
        ['alkenes', 'alkynes']
    )

    try:
        chosen_bond = random.choice(eligible_bonds)
        return rxn_hydrogenation(mol_array[0], (chosen_bond[0], chosen_bond[1]))
    except:
        print("CHECK ERROR: There are no double or triple bonds available in this molecule")
        return False
    finally:
        print(f"============ HYDROGENATION CHECK END ============\n")


# Example
test_mol = Chem.MolFromSmiles('C#CCC=C(C)C')
test_hydrogenation = check_hydrogenation([test_mol])

Draw.MolsToGridImage(
    [test_mol] + test_hydrogenation,
    subImgSize = (300, 300),
    molsPerRow = 4
)

============ HYDROGENATION CHECK START ============

PARSE LOG: molecule C#CCC=C(C)C has been parsed with tag size of 8
FILTER LOG: alkenes filtered
FILTER LOG: alkynes filtered

============ HYDROGENATION REACTION START ============
Molecule of Interest: C#CCC=C(C)C || Bond Indexes: (3, 4)
Bond order is decreased at bond [3, 4]
[STEP]: generated molecule C#CCCC(C)C
============ HYDROGENATION REACTION END ============

============ HYDROGENATION CHECK END ============

# Aldehyde and ketone reduction

def rxn_akreduction(mol_array: list, carbonyl_tuple: tuple) -> list:
    """
    Perform aldehyde/ketone reduction on a molecule by converting a C=O bond into a C-OH group.  

    The function simulates hydride reduction (e.g., NaBH4 or LiAlH4), where the carbonyl carbon
    accepts a hydride and the double bond to oxygen is reduced to a single bond, producing an alcohol.  

    Parameters
    ----------
    mol_array : list of rdkit.Chem.Mol
        A list containing exactly one RDKit molecule to undergo reduction.
    carbonyl_tuple : tuple of int
        A pair of atom indices (carbon_idx, oxygen_idx) representing the carbonyl bond (C=O) 
        to be reduced. The first index should correspond to the carbon, the second to the oxygen.  

    Returns
    -------
    list of rdkit.Chem.Mol
        A list containing the product molecule with the carbonyl reduced to a hydroxyl group.  

    Notes
    -----
    - This function assumes the input molecule has a valid carbonyl functional group.  
    - For simplicity, protonation of the oxygen to form -OH is implicit and not explicitly simulated.  
    - Only one molecule is accepted; providing multiple molecules will result in undefined behavior.  
    """
    mol = Chem.RWMol(mol_array[0])

    print(f"\n============ AK REDUCTION REACTION START ============")
    print(f"Carbonyl: {Chem.MolToSmiles(mol)} || Carbonyl Bond: {carbonyl_tuple}")

    # Decrease the C=O bond to C-O (single bond)
    product = decrease_bond(mol, carbonyl_tuple[0], carbonyl_tuple[1])
    Chem.SanitizeMol(product)

    print(f"[STEP] generated molecule {Chem.MolToSmiles(product)}")
    print(f"============ AK REDUCTION REACTION END ============\n")

    return [product]


def check_akreduction(mol_array: list):
    """
    Validate and perform aldehyde/ketone reduction if requirements are met.  

    This function checks whether the input consists of exactly two molecules:
    - One must contain a carbonyl functional group (C=O).  
    - The other must be a hydride donor (e.g., NaBH4, LiAlH4).  

    If both conditions are satisfied, it randomly selects a carbonyl group 
    from the substrate molecule and calls `rxn_akreduction()` to produce the reduced product.  

    Parameters
    ----------
    mol_array : list of rdkit.Chem.Mol
        A list of exactly two RDKit molecules. One should contain a carbonyl 
        group, and the other should be a hydride donor.  

    Returns
    -------
    list of rdkit.Chem.Mol or bool
        - If valid: returns a list containing the reduced alcohol product.  
        - If invalid: returns False.  

    Notes
    -----
    - Random selection ensures that only one carbonyl group is reduced at a time.  
    - If multiple carbonyls exist, only one is acted upon per call.  
    - Input order of carbonyl vs. hydride donor does not matter.  
    """
    print("\n============ AK REDUCTION CHECK START ============")

    if len(mol_array) != 2:
        print("CHECK ERROR: Input must be a list of 2 molecules.")
        return False

    # Parse both molecules
    tags_0 = parse_molecule(mol_array[0])
    tags_1 = parse_molecule(mol_array[1])

    # Check for carbonyl and hydride
    carbonyl_0 = filter_substructure(tags_0, ['carbonyls'])
    carbonyl_1 = filter_substructure(tags_1, ['carbonyls'])
    hydride_0 = filter_substructure(tags_0, ['nuc hydrides'])
    hydride_1 = filter_substructure(tags_1, ['nuc hydrides'])

    # Identify which molecule is carbonyl and which is hydride
    if carbonyl_0 and hydride_1:
        carbonyl_mol = mol_array[0]
        carbonyl_groups = carbonyl_0
    elif carbonyl_1 and hydride_0:
        carbonyl_mol = mol_array[1]
        carbonyl_groups = carbonyl_1
    else:
        print("CHECK ERROR: No valid carbonyl/hydride pair found.")
        return False

    # Randomly select one carbonyl group
    try:
        chosen_carbonyl = random.choice(carbonyl_groups)
        return rxn_akreduction([carbonyl_mol], chosen_carbonyl)
    except Exception as e:
        print(f"[ERROR] {e}")
    finally:
        print("============ AK REDUCTION CHECK END ============\n")


# Example
test_mol = Chem.MolFromSmiles('OCC(=O)CCO')
test_hydride = Chem.MolFromSmiles('[H-]')

test_akreduction = check_akreduction([test_hydride, test_mol])

Draw.MolsToGridImage(
    [test_mol, test_hydride] + test_akreduction,
    subImgSize = (300, 300),
    molsPerRow = 4
)

============ AK REDUCTION CHECK START ============

PARSE LOG: molecule [H-] has been parsed with tag size of 1

PARSE LOG: molecule O=C(CO)CCO has been parsed with tag size of 11
FILTER LOG: carbonyls filtered
FILTER LOG: nuc hydrides filtered

============ AK REDUCTION REACTION START ============
Carbonyl: O=C(CO)CCO || Carbonyl Bond: (2, 3)
Bond order is decreased at bond [2, 3]
[STEP] generated molecule OCCC(O)CO
============ AK REDUCTION REACTION END ============

============ AK REDUCTION CHECK END ============

[16:08:46] WARNING: not removing hydrogen atom without neighbors

# Aldehyde and ketone oxidation

def rxn_akoxidation(mol_array: list, hydroxy_tuple: tuple) -> list:
    """
    Perform aldehyde/ketone oxidation on a molecule by converting a C-OH bond into a C=O group.  

    The function simulates PCC oxidation, where the hydroxy carbon donates hydrogens
    so that the single bond to hydroxy is oxidized to a double bond, producing a carbonyl.  

    Parameters
    ----------
    mol_array : list of rdkit.Chem.Mol
        A list containing exactly one RDKit molecule to undergo reduction.
    carbonyl_tuple : tuple of int
        A pair of atom indices (carbon_idx, oxygen_idx) representing the hydroxy bond (C-OH) 
        to be oxidized. The first index should correspond to the carbon, the second to the oxygen.  

    Returns
    -------
    list of rdkit.Chem.Mol
        A list containing the product molecule with the hydroxy oxidized to a carbonyl group.  

    Notes
    -----
    - This function assumes the input molecule has a valid hydroxy functional group.  
    - For simplicity, deprotonation of the oxygen to form =O is implicit and not explicitly simulated.  
    - Only one molecule is accepted; providing multiple molecules will result in undefined behavior.  
    """
    mol = Chem.RWMol(mol_array[0])

    print(f"\n============ AK OXIDATION REACTION START ============")
    print(f"Carbonyl: {Chem.MolToSmiles(mol)} || Carbonyl Bond: {hydroxy_tuple}")

    # Decrease the C=O bond to C-O (single bond)
    product = increase_bond(mol, hydroxy_tuple[0], hydroxy_tuple[1])
    Chem.SanitizeMol(product)

    print(f"[STEP] generated molecule {Chem.MolToSmiles(product)}")
    print(f"============ AK OXIDATION REACTION END ============\n")

    return [product]

# Check function


def check_akoxidation(mol_array: list):
    """
    Validate and perform aldehyde/ketone oxidation if requirements are met.  

    This function checks whether the input consists of exactly two molecules:
    - One must contain a hydroxy functional group (C-OH).  
    - The other must be PCC.  

    If both conditions are satisfied, it randomly selects a hydroxy group 
    from the substrate molecule and calls `rxn_akoxidation()` to produce the oxidized product.  

    Parameters
    ----------
    mol_array : list of rdkit.Chem.Mol
        A list of exactly two RDKit molecules. One should contain a hydroxy 
        group, and the other should be PCC.  

    Returns
    -------
    list of rdkit.Chem.Mol or bool
        - If valid: returns a list containing the oxidized carbonyl product.  
        - If invalid: returns False.  

    Notes
    -----
    - Random selection ensures that only one hydroxy group is reduced at a time.  
    - If multiple hydroxy exist, only one is acted upon per call.  
    - Input order of hydroxy vs. PCC does not matter.  
    """
    print("\n============ AK OXIDATION CHECK START ============")

    if len(mol_array) != 2:
        print("CHECK ERROR: Input must be a list of 2 molecules.")
        return False

    # Parse both molecules
    tags_0 = parse_molecule(mol_array[0])
    tags_1 = parse_molecule(mol_array[1])

    # Check for hydroxy and pcc
    hydroxy_0 = filter_substructure(tags_0, ['alcohols'])
    hydroxy_1 = filter_substructure(tags_1, ['alcohols'])
    
    pcc = Chem.MolFromSmiles('C1=CC=[NH+]C=C1.[O-][Cr](=O)(=O)Cl')

    # Identify which molecule is carbonyl and which is hydride
    if hydroxy_0 and Chem.MolToSmiles(mol_array[1], canonical=True) == Chem.MolToSmiles(pcc, canonical=True):
        hydroxy_mol = mol_array[0]
        hydroxy_groups = hydroxy_0
    elif hydroxy_1 and Chem.MolToSmiles(mol_array[0], canonical=True) == Chem.MolToSmiles(pcc, canonical=True):
        hydroxy_mol = mol_array[1]
        hydroxy_groups = hydroxy_1
    else:
        print("CHECK ERROR: No valid hydroxy/PCC pair found.")
        return False

    # Randomly select one carbonyl group
    try:
        chosen_hydroxy = random.choice(hydroxy_groups)
        return rxn_akoxidation([hydroxy_mol], chosen_hydroxy)
    except Exception as e:
        print(f"[ERROR] {e}")
        return False
    finally:
        print("============ AK OXIDATION CHECK END ============\n")


# Example
test_mol = Chem.MolFromSmiles('OCC(=O)CCO')
test_pcc = Chem.MolFromSmiles('C1=CC=[NH+]C=C1.[O-][Cr](=O)(=O)Cl')

test_akoxidation = check_akoxidation([test_mol, test_pcc])

Draw.MolsToGridImage(
    [test_mol, test_pcc] + test_akoxidation,
    subImgSize = (300, 300),
    molsPerRow = 4
)

============ AK OXIDATION CHECK START ============

PARSE LOG: molecule O=C(CO)CCO has been parsed with tag size of 11

PARSE LOG: molecule O=[Cr](=O)([O-])Cl.c1cc[nH+]cc1 has been parsed with tag size of 0
FILTER LOG: alcohols filtered

============ AK OXIDATION REACTION START ============
Carbonyl: O=C(CO)CCO || Carbonyl Bond: (1, 0)
Bond order is increased at bond [1, 0]
[STEP] generated molecule O=CC(=O)CCO
============ AK OXIDATION REACTION END ============

============ AK OXIDATION CHECK END ============

def rxn_ozonolysis(mol_array: list, alkene_tuple: tuple) -> list:
    """
    Perform ozonolysis on an alkene, cleaving the C=C double bond into two carbonyl groups.

    Parameters
    ----------
    mol_array : list of RDKit Mol
        A list containing exactly one RDKit molecule to undergo ozonolysis.
    alkene_tuple : tuple of int
        A tuple (carbon_idx1, carbon_idx2) representing the atom indices of the C=C bond.

    Returns
    -------
    list of RDKit Mol
        A list of product molecules formed after ozonolysis, each containing a carbonyl group.

    Notes
    -----
    - This function adds two oxygen atoms and breaks the alkene bond into two carbonyl groups.
    - If ozonolysis is performed on an internal alkene, two separate carbonyl fragments will result.
    - If ozonolysis is performed on a terminal alkene, one of the fragments will be formaldehyde (H₂C=O).

    Examples
    --------
    >>> mol = Chem.MolFromSmiles("C=CC")
    >>> rxn_ozonolysis([mol], (0, 1))
    # Produces acetaldehyde and formaldehyde
    """

    mol = Chem.RWMol(mol_array[0])
    oxygen = Chem.RWMol(Chem.MolFromSmiles('O'))
    combined_mol = Chem.CombineMols(mol, oxygen)
    combined_mol = Chem.CombineMols(combined_mol, oxygen)

    offset = mol.GetNumAtoms()

    o1, o2 = offset, offset + 1

    print(f"\n============ OZONOLYSIS REACTION START ============")
    print(f"Alkene: {Chem.MolToSmiles(mol)} || Alkene Bond: {alkene_tuple}")
    print("")

    # Decrease the C=C bond first, and then form the carbonyls
    product = decrease_bond(combined_mol, alkene_tuple[0], alkene_tuple[1])
    product = decrease_bond(product, alkene_tuple[0], alkene_tuple[1])
    
    for carbon, oxygen in zip(alkene_tuple, (o1, o2)):
        product = increase_bond(product, carbon, oxygen)
        product = increase_bond(product, carbon, oxygen)

    print(f"[STEP] generated molecule {Chem.MolToSmiles(product)}")
    
    product = separate_mols(product)
    print(f"============ OZONOLYSIS REACTION END ============\n")

    return product


def check_ozonolysis(mol_array: list):
    """
    Check whether a given molecule array is eligible for ozonolysis.

    Parameters
    ----------
    mol_array : list of RDKit Mol
        A list containing exactly two molecules. One must be an alkene,
        and the other must be ozone (O3, represented as [O-][O+]=O).

    Returns
    -------
    list of RDKit Mol or bool
        - If eligible, returns the product molecules generated by `rxn_ozonolysis()`.
        - If not eligible, returns False.

    Notes
    -----
    - This function verifies that one molecule contains an alkene substructure.
    - The other molecule must match ozone by SMILES comparison.
    - If multiple alkenes are present, one is chosen at random for cleavage.

    Examples
    --------
    >>> alkene = Chem.MolFromSmiles("C=CC")
    >>> ozone = Chem.MolFromSmiles("[O-][O+]=O")
    >>> check_ozonolysis([alkene, ozone])
    # Produces a list of carbonyl-containing fragments
    """

    print("\n============ OZONOLYSIS CHECK START ============")

    if len(mol_array) != 2:
        print("CHECK ERROR: Ozonolysis requires exactly 2 reactants.")
        return False
    
    # Parse both molecules
    tags_0 = parse_molecule(mol_array[0])
    tags_1 = parse_molecule(mol_array[1])

    alkene_0 = filter_substructure(tags_0, ['alkenes'])
    alkene_1 = filter_substructure(tags_1, ['alkenes'])

    ozone = Chem.MolFromSmiles('[O-][O+]=O')

    # Identify which molecule is the alkene and which is ozone
    if alkene_0 and Chem.MolToSmiles(mol_array[1], canonical=True) == Chem.MolToSmiles(ozone, canonical=True):
        alkene_mol, alkene_groups = mol_array[0], alkene_0
    elif alkene_1 and Chem.MolToSmiles(mol_array[0], canonical=True) == Chem.MolToSmiles(ozone, canonical=True):
        alkene_mol, alkene_groups = mol_array[1], alkene_1
    else:
        print("CHECK ERROR: No valid alkene/ozone pair found.")
        return False

    try:
        chosen_alkene = random.choice(alkene_groups)
        return rxn_ozonolysis([alkene_mol], chosen_alkene)
    except Exception as e:
        print(f"[ERROR] {e}")
        return False
    finally:
        print("============ OZONOLYSIS CHECK END ============\n")


# Example
test_alkene = Chem.MolFromSmiles('[Br]CCC=C(C)(C)')
test_ozone = Chem.MolFromSmiles('[O-][O+]=O')

test_ozonolysis = check_ozonolysis([test_alkene, test_ozone])

Draw.MolsToGridImage(
    [test_alkene] + test_ozonolysis,
    subImgSize = (300, 300),
    molsPerRow = 4
)

============ OZONOLYSIS CHECK START ============

PARSE LOG: molecule CC(C)=CCCBr has been parsed with tag size of 8

PARSE LOG: molecule O=[O+][O-] has been parsed with tag size of 0
FILTER LOG: alkenes filtered

============ OZONOLYSIS REACTION START ============
Alkene: CC(C)=CCCBr || Alkene Bond: (3, 4)

Bond order is decreased at bond [3, 4]
Bond order is decreased at bond [3, 4]
Bond order is increased at bond [3, 7]
Bond order is increased at bond [3, 7]
Bond order is increased at bond [4, 8]
Bond order is increased at bond [4, 8]
[STEP] generated molecule CC(C)=O.O=CCCBr
separating molecules...
separation complete...
============ OZONOLYSIS REACTION END ============

============ OZONOLYSIS CHECK END ============

def rxn_wittig(mol_array: list, combine_tuple: tuple) -> list:
    """
    Perform a Wittig reaction between a carbonyl compound and a phosphorus ylide
    to generate an alkene and a phosphine oxide.

    Parameters
    ----------
    mol_array : list of RDKit Mol
        A list of exactly two molecules:
        - mol_array[0] : the carbonyl compound (aldehyde or ketone)
        - mol_array[1] : the phosphorus ylide (containing the carbanion and P atom)
    combine_tuple : tuple of int
        Atom indices defining the reactive centers:
        (carbonyl_carbon, carbonyl_oxygen, ylide_carbon, ylide_phosphorus)

    Returns
    -------
    list of RDKit Mol
        A list containing the separated product molecules: an alkene and a
        phosphine oxide (O=PPh3 equivalent if full ylide is included).

    Notes
    -----
    - This function simulates bond-breaking of the C=O and C-P double bonds,
      followed by bond-forming to create C=C and O=P.
    - The Wittig reaction is a cornerstone carbon-carbon bond-forming reaction
      widely used in organic synthesis to generate alkenes from aldehydes or
      ketones.

    Examples
    --------
    >>> carbonyl = Chem.MolFromSmiles("C=O")
    >>> ylide = Chem.MolFromSmiles("[CH2-][P+](C)(C)C")
    >>> rxn_wittig([carbonyl, ylide], (0, 1, 2, 3))
    # Returns an alkene and a phosphine oxide as RDKit Mol objects
    """
    carbonyl, ylide = map(Chem.RWMol, mol_array)
    combined_mol = Chem.CombineMols(carbonyl, ylide)

    offset = carbonyl.GetNumAtoms()
    c1, o1, c2, p2 = combine_tuple

    print("\n============ WITTIG REACTION START ============")
    print(f"Carbonyl: {Chem.MolToSmiles(carbonyl)} || Combine Index: ({c1}, {o1})")
    print(f"Ylide: {Chem.MolToSmiles(ylide)} || Combine Index: ({c2 + offset}, {p2 + offset})")

    # Break C=O and C–P double bonds
    for _ in range(2):
        combined_mol = decrease_bond(combined_mol, c1, o1)
        combined_mol = decrease_bond(combined_mol, c2 + offset, p2 + offset)

    # Form C=C and O=P double bonds
    for atom_a, atom_b in [(c1, c2 + offset), (o1, p2 + offset)]:
        combined_mol = increase_bond(combined_mol, atom_a, atom_b)
        combined_mol = increase_bond(combined_mol, atom_a, atom_b)

    print(f"WITTIG: generated molecule {Chem.MolToSmiles(combined_mol)}")
    product = separate_mols(combined_mol)

    print("============ WITTIG REACTION END ============\n")
    return product


def check_wittig(mol_array: list):
    """
    Check whether two molecules are eligible to undergo a Wittig reaction
    (carbonyl compound + phosphorus ylide → alkene + phosphine oxide).

    Parameters
    ----------
    mol_array : list of RDKit Mol
        A list of exactly two molecules. The pair must consist of:
        - A carbonyl compound (aldehyde or ketone)
        - A phosphorus ylide (contains carbanion adjacent to positively charged P)

    Returns
    -------
    list of RDKit Mol or bool
        - If eligible: a list of product molecules from `rxn_wittig()`
        - If not eligible: returns False

    Notes
    -----
    - Identifies carbonyl and ylide functional groups using `parse_molecule()`
      and `filter_substructure()`.
    - Randomly selects one carbonyl and one ylide reactive site to construct
      the index tuple for `rxn_wittig()`.
    - Prints debug messages during the checking process.
    """

    print("\n============ WITTIG CHECK START ============")

    if len(mol_array) != 2:
        print("CHECK ERROR: Wittig reaction requires exactly 2 reactants.")
        return False
    
    # Parse both molecules
    tags_0 = parse_molecule(mol_array[0])
    tags_1 = parse_molecule(mol_array[1])

    carbonyl_0 = filter_substructure(tags_0, ['carbonyls'])
    carbonyl_1 = filter_substructure(tags_1, ['carbonyls'])
    ylide_0 = filter_substructure(tags_0, ['ylides'])
    ylide_1 = filter_substructure(tags_1, ['ylides'])
    
    # Identify which molecule is the carbonyl and which is the ylide
    if carbonyl_0 and ylide_1:
        carbonyl_mol, ylide_mol = mol_array[0], mol_array[1]
        carbonyl_groups, ylide_groups = carbonyl_0, ylide_1
    elif carbonyl_1 and ylide_0:
        carbonyl_mol, ylide_mol = mol_array[1], mol_array[0]
        carbonyl_groups, ylide_groups = carbonyl_1, ylide_0
    else:
        print("CHECK ERROR: No valid carbonyl/ylide pair found.")
        return False

    # Randomly select a carbonyl and ylide group
    try:
        chosen_carbonyl = random.choice(carbonyl_groups)
        chosen_ylide = random.choice(ylide_groups)
        wittig_tuple = (chosen_carbonyl[0], chosen_carbonyl[1], chosen_ylide[0], chosen_ylide[1])
        return rxn_wittig([carbonyl_mol, ylide_mol], wittig_tuple)
    except Exception as e:
        print(f"[ERROR] {e}")
        return False
    finally:
        print("============ WITTIG CHECK END ============\n")


# Example
test_carbonyl = Chem.MolFromSmiles('OCC(=O)CC')
test_ylide = Chem.MolFromSmiles('[P](c1ccccc1)(c2ccccc2)(c3ccccc3)=C(CC)C')

test_wittig = check_wittig([test_carbonyl, test_ylide])

Draw.MolsToGridImage(
    [test_carbonyl, test_ylide] + test_wittig,
    subImgSize = (300, 300),
    molsPerRow = 4
)

============ WITTIG CHECK START ============

PARSE LOG: molecule CCC(=O)CO has been parsed with tag size of 11

PARSE LOG: molecule CCC(C)=P(c1ccccc1)(c1ccccc1)c1ccccc1 has been parsed with tag size of 3
FILTER LOG: carbonyls filtered
FILTER LOG: ylides filtered

============ WITTIG REACTION START ============
Carbonyl: CCC(=O)CO || Combine Index: (2, 3)
Ylide: CCC(C)=P(c1ccccc1)(c1ccccc1)c1ccccc1 || Combine Index: (25, 6)
Bond order is decreased at bond [2, 3]
Bond order is decreased at bond [25, 6]
Bond order is decreased at bond [2, 3]
Bond order is decreased at bond [25, 6]
Bond order is increased at bond [2, 25]
Bond order is increased at bond [2, 25]
Bond order is increased at bond [3, 6]
Bond order is increased at bond [3, 6]
WITTIG: generated molecule CCC(C)=C(CC)CO.O=P(c1ccccc1)(c1ccccc1)c1ccccc1
separating molecules...
separation complete...
============ WITTIG REACTION END ============

============ WITTIG CHECK END ============

def rxn_ylides(mol_array: list, position_index: tuple) -> list:
    """
    Form a phosphorus ylide from a molecule with a leaving group and triphenylphosphine (PPh3).

    Parameters
    ----------
    mol_array : list of RDKit Mol
        A list containing exactly one molecule that has at least one leaving group.
    position_index : tuple of int
        A tuple (carbon_idx, lg_idx) representing the carbon atom bonded to the 
        leaving group and the leaving group atom index.

    Returns
    -------
    list of RDKit Mol
        A list of product molecules, including the ylide and byproducts. 
        Returns the input molecule unchanged if ylide formation fails.

    Notes
    -----
    - Triphenylphosphine (PPh3) is automatically added as a reactant.
    - The function removes the leaving group and attaches PPh3 to the carbon,
      forming the phosphorus ylide.
    - For simplicity, the requirement of BuLi (deprotonation step) is ignored.
    - If the carbon bound to the leaving group is not primary or secondary, 
      the ylide cannot form and the input molecule is returned unchanged.
    - Prints debugging information during execution.
    """

    mol = Chem.RWMol(mol_array[0])
    pph3 = Chem.RWMol(Chem.MolFromSmiles('[P](c1ccccc1)(c2ccccc2)(c3ccccc3)'))

    combined = Chem.CombineMols(mol, pph3)
    offset = mol.GetNumAtoms()

    print("\n============ YLIDE FORMATION REACTION START ============")
    print(f"Molecule: {Chem.MolToSmiles(mol)} || Combine Index: {position_index}")

    product = decrease_bond(combined, position_index[0], position_index[1])
    product = increase_bond(product, position_index[0], offset)
    try:
        product = increase_bond(product, position_index[0], offset)
    except:
        print("YLIDE ERROR: the leaving group carbon needs to be primary or secondary.")
        return mol_array

    print(f"YLIDE: generated molecule {Chem.MolToSmiles(product)}")
    product = separate_mols(product)

    print("============ YLIDE FORMATION REACTION END ============\n")
    return product


def check_ylides(mol_array: list):
    """
    Check whether the given molecule pair can undergo ylide formation with PPh3.

    Parameters
    ----------
    mol_array : list of RDKit Mol
        A list containing exactly 2 molecules. One must contain a leaving group 
        (attached to a primary or secondary carbon), and the other must be triphenylphosphine (PPh3).

    Returns
    -------
    list of RDKit Mol or bool
        The result of `rxn_ylides()` if a valid leaving group/PPh3 pair is found, 
        otherwise returns False.

    Notes
    -----
    - Automatically identifies which molecule provides the leaving group and 
      which is PPh3 by comparing SMILES.
    - Randomly selects one leaving group from the eligible molecule.
    - Prints detailed logs during eligibility checks and reaction execution.
    - Returns False if:
        * The input list does not contain exactly 2 molecules.
        * No valid leaving group/PPh3 pair is found.
        * The chosen leaving group is unsuitable for ylide formation.
    """
    print("\n============ YLIDE FORMATION CHECK START ============")

    if len(mol_array) != 2:
        print("CHECK ERROR: ylide formation reaction requires exactly 2 reactants.")
        return False
    
    # Parse both molecules
    tags_0 = parse_molecule(mol_array[0])
    tags_1 = parse_molecule(mol_array[1])

    lg_0 = filter_leaving_groups(tags_0)
    lg_1 = filter_leaving_groups(tags_1)

    pph3 = Chem.MolFromSmiles('[P](c1ccccc1)(c2ccccc2)(c3ccccc3)')
    
    # Identify which molecule is the carbonyl and which is the ylide
    if lg_0 and Chem.MolToSmiles(mol_array[1], canonical=True) == Chem.MolToSmiles(pph3, canonical=True):
        lg_mol = mol_array[0]
        lg_groups = lg_0
    elif lg_1 and Chem.MolToSmiles(mol_array[0], canonical=True) == Chem.MolToSmiles(pph3, canonical=True):
        lg_mol = mol_array[1]
        lg_groups = lg_1
    else:
        print("CHECK ERROR: No valid lg/pph3 pair found.")
        return False

    # Randomly select a carbonyl and ylide group
    try:
        chosen_lg = random.choice(lg_groups)
        return rxn_ylides([lg_mol], (chosen_lg[1], chosen_lg[0]))
    except Exception as e:
        print(f"[ERROR] {e}")
        return False
    finally:
        print("============ YLIDE FORMATION CHECK END ============\n")


# Example
test_alkene = Chem.MolFromSmiles('[Br]CCC=C(C)(C)')
test_pph3 = Chem.MolFromSmiles('[P](c1ccccc1)(c2ccccc2)(c3ccccc3)')

test_ylide = check_ylides([test_alkene, test_pph3])

Draw.MolsToGridImage(
    [test_alkene, test_pph3] + test_ylide,
    subImgSize = (300, 300),
    molsPerRow = 4
)

============ YLIDE FORMATION CHECK START ============

PARSE LOG: molecule CC(C)=CCCBr has been parsed with tag size of 8

PARSE LOG: molecule c1ccc(P(c2ccccc2)c2ccccc2)cc1 has been parsed with tag size of 0
Leaving groups are being filtered...
FILTER LOG: lg halides filtered
Leaving groups are being filtered...

============ YLIDE FORMATION REACTION START ============
Molecule: CC(C)=CCCBr || Combine Index: (1, 0)
Bond order is decreased at bond [1, 0]
Bond order is increased at bond [1, 7]
Bond order is increased at bond [1, 7]
YLIDE: generated molecule Br.CC(C)=CCC=P(c1ccccc1)(c1ccccc1)c1ccccc1
separating molecules...
Molecule [Br] is omitted due to small size
separation complete...
============ YLIDE FORMATION REACTION END ============

============ YLIDE FORMATION CHECK END ============

# Construct an array that lists the check functions we need
# The list will update as we expand on Part 4

check_function_list = [
    check_substitution,
    check_elimination,
    #check_addition (WIP)
    check_akreduction,
    check_akoxidation,
    check_hydrogenation,
    check_ozonolysis,
    check_wittig,
    check_ylides
]

compound_list = [
    '[Br][Br]', '[Cl][Cl]', '[Br]', '[Cl]','O', '[H][H]',
    'C1=CC=[NH+]C=C1.[O-][Cr](=O)(=O)Cl', # PCC
    '[Na+].[BH4-]', '[Li+].[AlH4-]', # Hydride donors
    'O=C(C)O[Hg]OC(C)=O', '[BH3]', 'OO', # Oxymercuration-demercuration, hydroboration-oxidation
    'ClC1=CC(C(OO)=O)=CC=C1', #mCPBA
    'O=[Os](=O)(=O)=O', # Alkene syn dihydroxylation
    'N#[N+]-[C-]', # Cyclopropanation
    '[O-][O+]=O', # Ozonolysis
    '[Na]N', # Strong base
    'N', 'NN', 'C#N', # Amine, Hydrazine
    '[P](c1ccccc1)(c2ccccc2)(c3ccccc3)', # PPh3 for ylide formation
]

compound_list_mol = []
for compound in compound_list:
    mol = Chem.MolFromSmiles(compound)
    compound_list_mol.append(mol)
Draw.MolsToGridImage(
    compound_list_mol,
    subImgSize = (300, 300),
    molsPerRow = 5
)

# Choosing the molecules

def mda_check(
        mol_array: list, 
        num_mols: int = 0, 
        rxn_type: int = -1
    ) -> list:
    """
    Master Detection Apparatus (MDA) for Chemical Reactions.

    This function selects 1 or 2 molecules from an input array, determines which type of reaction
    they can undergo, and executes the reaction using the appropriate `check` functions.

    Parameters
    ----------
    mol_array : list
        List of RDKit molecule objects available for reaction.
    num_mols : int, optional
        Number of molecules to select for reaction.
        If 0 (default), randomly chooses 1 molecule (30% chance) or 2 molecules (70% chance).
    rxn_type : int, optional
        Specifies the reaction type to attempt:
            - 0: substitution
            - 1: elimination
            - 2: addition
            - -1: randomly select among available reaction types
        Additional reaction types can be added and referenced via `check_function_list`.

    Returns
    -------
    list
        List of RDKit molecule objects representing the reaction products.
        If no reaction is possible, returns the selected input molecules unchanged.

    Notes
    -----
    - The function prints debug information for molecule selection and reaction execution.
    - Uses the globally defined `check_function_list` to evaluate feasible reactions.
    - Randomization is used both for molecule selection and reaction parameter choices
    when `rxn_type=-1`.
    """

    print("=== MASTER DETECTION APPARATUS ===")

    # Randomly choose 1 or 2 molecules
    for index, mol in enumerate(mol_array):
        print(f"Molecule {index}: {Chem.MolToSmiles(mol)}")

    if num_mols == 0:
        number_pool = [1, 1, 1, 2, 2, 2, 2, 2, 2, 2]
        number_mol = random.choice(number_pool)
        the_chosen_ones = random.choices(mol_array, k=number_mol)
    else:
        number_mol = num_mols
        the_chosen_ones = random.choices(mol_array, k=num_mols)

    print("")
    print(f"MDA: Chosen {number_mol} molecule(s)")

    for index, mol in enumerate(the_chosen_ones):
        print(f"MDA: Chosen molecule {index}: {Chem.MolToSmiles(mol)}")
    
    # Depending on rxn_type, select the appropriate reaction type
    if rxn_type == -1: # Random

        check_results = []
        for checks in check_function_list:
            result = checks(the_chosen_ones)
            if result:
                check_results.append(result)
    
        try:
            products = random.choice(check_results)
        except:
            print("MDA ERROR: the molecules presented cannot undergo any reaction available")
            return the_chosen_ones
    
    else: # Selected type

        try:
            products = check_function_list[rxn_type](the_chosen_ones)
        except:
            print(f"MDA ERROR: The molecules cannot undergo reaction type {rxn_type}")
            return the_chosen_ones
    
    return products


# Example
test_group = [
    Chem.MolFromSmiles('CCCCC[Br]'),
    Chem.MolFromSmiles('O'),
    Chem.MolFromSmiles('[Br]CCCCCO'),
    Chem.MolFromSmiles('CC(=O)C'),
    Chem.MolFromSmiles('CCOCC'),
    mda_randomization(1, 2400, 0),
]

test_MDA = mda_check([Chem.MolFromSmiles('[Br]CCCCCO')], 1)

Draw.MolsToGridImage(
    test_group + test_MDA,
    subImgSize = (300, 300),
    molsPerRow = 4
)

--- Master Detection Apparatus: Level 6 | Budget 1 ---
------------ generated molecule CCC with 3 carbons
---ETHER---- Generated molecule: COC
  O added at position(s): [1]
  N added at position(s): []
  S added at position(s): []
=== MASTER DETECTION APPARATUS ===
Molecule 0: OCCCCCBr

MDA: Chosen 1 molecule(s)
MDA: Chosen molecule 0: OCCCCCBr

========== SUBSTITUTION CHECK START ==========
[INFO] Substitution will be intramolecular
[INFO] Parsing and filtering molecule sites...

PARSE LOG: molecule OCCCCCBr has been parsed with tag size of 8
Nucleophiles are being filtered...
FILTER LOG: nuc hydroxys filtered
Leaving groups are being filtered...
FILTER LOG: lg halides filtered
FILTER LOG: lg hydroxys filtered

========== SUBSTITUTION REACTION START ==========
Nucleophile: OCCCCCBr (attacking atom index: 6)
Electrophile: OCCCCCBr (bond to break: 1-0)
Bond order is decreased at bond [1, 0]
Bond order is increased at bond [6, 1]
[INFO] Atom 6 successfully attacked atom 1
[INFO] Intermediate product SMILES: Br.C1CCOCC1
separating molecules...
Molecule [Br] is omitted due to small size
separation complete...
=========== SUBSTITUTION REACTION END ===========

=========== SUBSTITUTION CHECK END ===========


=========== ELIMINATION CHECK END ===========
CHECK LOG: Parsing and filtering molecules

PARSE LOG: molecule OCCCCCBr has been parsed with tag size of 8
Leaving groups are being filtered...
FILTER LOG: lg halides filtered
FILTER LOG: lg hydroxys filtered

=========== ELIMINATION REACTION START ===========
Molecule: OCCCCCBr | Bond indexes involved: (5, 6, 4)
Bond order is decreased at bond [5, 6]
Bond order is increased at bond [5, 4]
separating molecules...
Molecule O is omitted due to small size
separation complete...
[RESULT] Elimination generated molecule: C=CCCCBr.O
=========== ELIMINATION REACTION END ===========

=========== ELIMINATION CHECK END ===========


============ AK REDUCTION CHECK START ============
CHECK ERROR: Input must be a list of 2 molecules.

============ AK OXIDATION CHECK START ============
CHECK ERROR: Input must be a list of 2 molecules.

============ HYDROGENATION CHECK START ============

PARSE LOG: molecule OCCCCCBr has been parsed with tag size of 8
CHECK ERROR: There are no double or triple bonds available in this molecule
============ HYDROGENATION CHECK END ============


============ OZONOLYSIS CHECK START ============
CHECK ERROR: Ozonolysis requires exactly 2 reactants.

============ WITTIG CHECK START ============
CHECK ERROR: Wittig reaction requires exactly 2 reactants.

============ YLIDE FORMATION CHECK START ============
CHECK ERROR: ylide formation reaction requires exactly 2 reactants.

/* Center content and adjust width for readability */
body {
    width: 1000px;
    margin: 0 auto;
}
/* Center content and adjust width for readability */

# Substitution legacy code

def rxn_substitution(electrophile_dict:dict, nucleophile_dict:dict, mechanism:str):
    """
    This function takes in an electrophile and a nucleophile to simulate a substitution reaction. 
    The substitution reaction includes pi-bond dissociation to form a sigma-bond with hydrogen. 
    Thus, reactions like addition will not be included.

    Mechanism will be restricted only to "basic", "acidic", and "self".
    """
    # Setting up the molecules and their tags for filtering
    elec, elec_tags = Chem.RWMol(electrophile_dict['molecule']), electrophile_dict['tags']
    nuc, nuc_tags = Chem.RWMol(nucleophile_dict['molecule']), nucleophile_dict['tags']

    if mechanism == "self":
        combined = elec
    else:
        combined = Chem.CombineMols(elec, nuc)

    offset = elec.GetNumAtoms() # Gets the index change during molecule combination

    print("")
    print(f"------------ REACTION: SUBSTITUTION ------------")
    print(f"Electrophile: {electrophile_dict['smiles']}")
    print(f"Nucleophile: {nucleophile_dict['smiles']}")
    print(f"Mechanism: {mechanism}")
    print("")

    # Filtering the atoms and randomly selecting the lottery winners
    eligible_elec_atoms, eligible_nuc_atoms = filter_leaving_groups(elec_tags), filter_nucleophiles(nuc_tags)

    try:
        selected_elec_tuple = random.choices(eligible_elec_atoms, k=1)[0]
        selected_nuc_tuple = random.choices(eligible_nuc_atoms, k=1)[0]
    except:
        print("SUBSTITUTION ERROR: no valid electrophiles or nucleophiles")
        return [electrophile_dict, nucleophile_dict]

    print("")
    print(f"Selected electrophile: atom index {selected_elec_tuple}")
    print(f"Selected nucleophile: atom index {selected_nuc_tuple[0]+offset}")

    # Substitution under way
    new_combined = decrease_bond(combined, selected_elec_tuple[0], selected_elec_tuple[1])
    
    if mechanism == "self":
        new_combined = increase_bond(new_combined, selected_nuc_tuple[0], selected_elec_tuple[1])
    else:
        new_combined = increase_bond(new_combined, selected_nuc_tuple[0]+offset, selected_elec_tuple[1])
    
    print(f"SUBSTITUTION: atom {selected_nuc_tuple[0]+offset} attacked {selected_elec_tuple[1]}")
    print(f"SUBSTITUTION: generated molecule {Chem.MolToSmiles(new_combined)}")

    frag = Chem.GetMolFrags(new_combined, asMols=True)

    # Just in case the molecule is monoatomic (excluding hydrogens), we will get rid of it
    new_molecules = []
    for mol in frag:
        if mol.GetNumAtoms() < 2:
            print(f"Molecule {Chem.MolToSmiles(mol)} is omitted due to small size")
            continue
        new_molecules.append(mol)
    
    try:
        for mol in new_molecules:
            Chem.SanitizeMol(mol)
    except:
        print("ERROR IN SANITIZATION: refer back to original molecules")
        return [electrophile_dict, nucleophile_dict]
    
    print(f"END--------- REACTION: SUBSTITUTION ---------END")
    return [parse_molecule(mol) for mol in new_molecules]


# rng_FG legacy code

def rng_FG(mol, factor, region):
    """
    Distributes molecules among different functions that add functional groups to the molecule randomly,
    
    This is where the region parameter is put to use. Depending on the region, more molecules of the same kind can be generated.
    """

    destiny_weights = {
        "halide": 100 - 40 * factor if region != 2 else 100,
        "ketone": 0 + 18 * factor if region != 3 else 150,
        "carboxy": 0 + 18 * factor if region != 4 else 150,
        "anhydride": 0 + 18 * factor if region != 5 else 150
    }

    destiny = random.choices(list(destiny_weights.keys()), destiny_weights.values(), k=1)[0]

    return {
        "halide": rng_hetero,
        "ketone": rng_ketones,
        "carboxy": rng_carboxys,
        "anhydride": rng_anhydrides,
    }.get(destiny)(mol, factor, region)

Main Category	Subcategories
1. Alkanes	1.1 Cycloalkanes
2. Alkenes	2.1 Cycloalkenes 2.2 Dienes
3. Alkynes	3.1 Cycloalkynes
4. Halides	4.1 Vicinal dihalides 4.2 Geminal dihalides
5. Alcohols	5.1 Vicinal diols 5.2 Vicinal haloalcohols
6. Aldehydes and Ketones	6.1 Enols (moved to alcohols) 6.2 Hydrazones 6.3 Hemiacetals 6.4 Acetals 6.5 Imines 6.6 Enamines (moved to amines) 6.7 Aldols 6.8 α,β-Unsaturated carbonyls 6.9 Δ-Dicarbonyls 6.10 Robinson annulation product (cyclohexene-carbonyl) 6.11 α-Hydroxyketones
7. Carboxys	7.1 Acyl Halides 7.2 Esters 7.3 Amides 7.4 Peroxycarboxys 7.5 Vicinal halocarboxys 7.6 Anhydrides 7.7 β-Ketoesters 7.8 β-Diketones
8. Ethers	8.1 Epoxides 8.2 Cyclic ethers
9. Amines	9.1 Quaternary amines 9.2 Tertiary amines 9.3 Nitros 9.4 Nitriles 9.5 Alkyl azides 9.6 N-nitrosamine

Reagent Class	Examples / Subtypes
Water	–
Halogens	Diatomic halogens, Hydrohalides, SOCl₂, PBr₃, PI₃
Hydride Donors	NaBH₄, LiAlH₄
Proton Donors	HCl, H₂SO₄, HNO₃, H₃O⁺
Organometallic Reagents	Alkyllithiums, Alkylmagnesiums (Grignard Reagents)
Oxidizing Agents	PCC, CrO₃, O₃, H₂O₂
Reducing Agents	Zn, NaBH₄, LiAlH₄, H₂, Pd, H₂N–NH₂
Mercury Catalyst	Hg²⁺
Borane Reagents	BH₃
Strong Bases	NaNH₂
Nitrosating Agents	NaNO₂
Azides	NaN₃
Cyanides	-
Ylides	-

Algorithmic Generation and Reaction Prediction in Organic Chemistry¶

Part 1: Procedural Generation of Organic Chemical Compounds¶

1.1 Introduction to RDKit and SMILES¶

1.2 Generation of Alkanes, Alkenes, and Alkynes¶

1.2.1 Branching Alkanes¶

1.2.2 Cyclized Alkanes¶

1.2.3 Alkenes and Alkynes¶

1.3 Simple Heteroatom Generation¶

1.3.1 Haloalkanes, Alcohols, Amines, and Thiols¶

1.3.2 Ethers and Sulfides¶

1.3.3 Aldehydes, Ketones, and Their Analogues¶

1.4 Compound Functional Groups¶

1.5 Aromatic and Heterocyclic Compounds¶

1.6 Complex Molecules¶

Part 2: Random Molecule Generation¶

2.1 Generation Flowchart¶

2.2 Constructing the Generator¶

2.2.1 Carbon Backbone Branching¶

2.2.2 Carbon Backbone Cyclization¶

2.2.3 Carbon Backbone Unsaturation¶

2.2.4 Heteroatom Randomization¶

2.2.5 Ether and Analogue Randomization¶

2.2.6 Ketones, Imines, and Sulfur Analogues¶

2.2.7 Carboxys, Acyl Halides, Esters, and Amides¶

2.2.8 Anhydrides¶

2.2.9 rng_*() Function Testing¶

2.3 The Master Detection Apparatus (MDA)¶

2.3.1 Experience Cutoffs, Levels, and Complexity Budgets¶

2.3.2 MDA Construction¶

2.3.3 MDA Testing¶

Part 3: Identifying and Parsing Substructures¶

3.1 Potential Substructures¶

3.2 Parsing Substructures¶

3.3 Substructure Filtering¶

Part 4: Organic Reactions¶

4.1 Substitution Generalization¶

4.2 Elimination Generalization¶

4.3 Addition Generalization¶

4.4 Hydrogenation Generalization¶

4.5 Aldehyde and Ketone Reduction and Oxidation¶

4.6 Ozonolysis and Wittig Reactions¶

Part 5: Master Detection Apparatus (MDA)¶

Appendix¶

A1. Note on Exporting the Notebook¶

A2. Development Log¶

041325-042325¶

042425-050425¶

082725-090925¶

A3. Unresolved Issues¶

2.2.9 `rng_*()` Function Testing¶