Source code for ilustrado.generation

""" This file implements the Generation class which
is used to store each generation of structures, and to
evaulate their fitness.
"""

import json

from matador.utils.chem_utils import get_formula_from_stoich
from matador.fingerprints.pdf import PDF


[docs]class Generation: """ Stores each generation of structures. Parameters: run_hash (str): hash for this GA run, generation_idx (int): index of this generation, num_survivors (int): number of structures to aim for per generation, num_accepted (int): number to accept from this generation, i.e. excluding elites, Keyword Arguments: populace (list(dict)): initial structures to populate generation with (optional) dumpfile (str): dumpfile name for this generation (optional) fitness_calculator (str): fitness metric to use, e.g. 'hull'. """ def __init__( self, run_hash: str, generation_idx: int, num_survivors: int, num_accepted: int, populace=None, dumpfile=None, fitness_calculator=None, ): self.populace = [] if populace is not None: self.populace = populace self._num_survivors = num_survivors self._num_accepted = num_accepted self._fitness_calculator = fitness_calculator self.run_hash = run_hash self.generation_idx = generation_idx self.bourgeoisie = [] if dumpfile is not None: self.load(dumpfile) def __len__(self): return len(self.populace) def __str__(self): gen_string = "\nCompleted generation {}:\n".format(self.generation_idx) gen_string += "Number of members: {}\n".format(len(self.populace)) gen_string += "Number of survivors: {}\n".format(len(self.bourgeoisie)) gen_string += "Populace:\n" gen_string += 84 * "─" + "\n" gen_string += "{:^10} {:^10} {:^25} {:^35}\n".format( "Formula", "Fitness", "Hull distance (eV/atom)", "ID" ) gen_string += 84 * "─" + "\n" for populum in self.populace: gen_string += "{:^10} {: ^10.5f} {:^25.5f} {:^35}\n".format( get_formula_from_stoich(populum["stoichiometry"]), populum["fitness"], populum["raw_fitness"], populum["source"][0] .split("/")[-1] .replace(".res", "") .replace(".castep", ""), ) gen_string += 84 * "─" + "\n" gen_string += "Bourgeoisie:\n" gen_string += 84 * "─" + "\n" gen_string += "{:^10} {:^10} {:^25} {:^35}\n".format( "Formula", "Fitness", "Hull distance (eV/atom)", "ID" ) gen_string += 84 * "─" + "\n" for bourge in self.bourgeoisie: gen_string += "{:^10} {: ^10.5f} {:^25.5f} {:^35}\n".format( get_formula_from_stoich(bourge["stoichiometry"]), bourge["fitness"], bourge["raw_fitness"], bourge["source"][0] .split("/")[-1] .replace(".res", "") .replace(".castep", ""), ) gen_string += "\n" return gen_string def __getitem__(self, key): return self.populace[int(key)] def __iter__(self): return iter(self.populace)
[docs] def dump(self, gen_suffix): """ Dump the current generation to JSON file. Parameters: gen_suffix (str): typically gen<gen_number>. """ with open("{}-gen{}.json".format(self.run_hash, gen_suffix), "w") as f: json.dump(self.populace, f, sort_keys=False, indent=2)
[docs] def dump_bourgeoisie(self, gen_suffix): """ Dump the current generation's bourgeoisie to JSON file. Parameters: gen_suffix (str) : typically gen<gen_number>. """ with open( "{}-gen{}-bourgeoisie.json".format(self.run_hash, gen_suffix), "w" ) as f: json.dump(self.bourgeoisie, f, sort_keys=False, indent=2)
[docs] def load(self, gen_fname): """ Load populace of the generation from a JSON dump. Parameters: gen_fname (str) : filename to load. """ with open(gen_fname, mode="r") as f: self.populace = json.load(f)
[docs] def load_bourgeoisie(self, bourge_fname): """ Load bourgeoisie of the generation from a JSON dump. Parameters: bourge_fname (str) : filename to load. """ with open(bourge_fname, mode="r") as f: self.bourgeoisie = json.load(f)
[docs] def birth(self, populum: dict): """ Add a structure to the populace. Parameters: populum (dict) : structure to add. """ self.populace.append(populum)
[docs] def rank(self): """ Evaluate the fitness of all structures in the generation. """ self._fitness_calculator.evaluate(self)
[docs] def clean(self): """ Remove structures with pathological formation enthalpies. Returns: num_removed (int) : number of pathological structures removed. """ init_len = len(self.populace) self.populace = [ populum for populum in self.populace if ( populum["formation_enthalpy_per_atom"] > -3.5 and populum["formation_enthalpy_per_atom"] < 1 ) ] return init_len - len(self.populace)
[docs] def set_bourgeoisie(self, elites=None, best_from_stoich=True): """ Set the structures that will continue to the next generation, i.e. the bourgeoisie. Keyword Arguments: elites list(dict) : list of elite structures to include from the previous generation, best_from_stoich (bool) : whether to include one structure from each stoichiometry. """ # first populate with best precomputed "num_accepted" structures, # where "num_accepted" takes into account the number of elites self.bourgeoisie = sorted( self.populace, key=lambda member: member["fitness"], reverse=True )[: self._num_accepted] # find the fittest structure from each stoichiometry sampled if best_from_stoich: best_from_stoichs = dict() for struc in self.populace: stoich = get_formula_from_stoich(sorted(struc["stoichiometry"])) best_from_stoichs[stoich] = {"fitness": -1} for struc in self.populace: stoich = get_formula_from_stoich(sorted(struc["stoichiometry"])) if best_from_stoichs[stoich]["fitness"] < struc["fitness"]: best_from_stoichs[stoich] = struc # if its not already included, add the best structure from this # stoichiometry in exchange for the least fit structure already included for stoich in best_from_stoichs: if best_from_stoichs[stoich] not in self.bourgeoisie: self.bourgeoisie.insert(0, best_from_stoichs[stoich]) if elites is not None: self.bourgeoisie.extend(elites)
[docs] def calc_pdfs(self): """ Compute PDFs for each structure in the generation. """ self._pdfs = [] self._stoichs = [] for structure in self.populace: self._pdfs.append(PDF(structure, projected=True)) self._stoichs.append(sorted(structure["stoichiometry"]))
[docs] def is_dupe(self, doc, sim_tol=5e-2, extra_pdfs=None): """ Compare doc with all other structures at same stoichiometry via PDF overlap. Parameters: doc (dict): structure to compare. Keyword Arguments: sim_tol (float): similarity tolerance to compare to extra_pdfs (list(dict)): list of structures with extra pdfs to compare against """ new_pdf = PDF(doc, projected=True) for ind, pdf in enumerate(self.pdfs): if sorted(doc["stoichiometry"]) == self._stoichs[ind]: dist = new_pdf.get_sim_distance(pdf, projected=True) if dist < sim_tol: return True if extra_pdfs is not None: for ind, _doc in enumerate(extra_pdfs): pdf = _doc["pdf"] if sorted(doc["stoichiometry"]) == sorted(_doc["stoichiometry"]): dist = new_pdf.get_sim_distance(pdf, projected=pdf.projected) if dist < sim_tol: return True return False
@property def pdfs(self): """ Returns list of PDFs for generation, calculating if necessary. """ try: return self._pdfs except (AttributeError, AssertionError): self.calc_pdfs() return self._pdfs @property def fitnesses(self): """ Return list of normalised fitnesses for population.""" return [populum["fitness"] for populum in self.populace] @property def raw_fitnesses(self): """ Return list of raw fitnesses for population. """ return [populum["raw_fitness"] for populum in self.populace] @property def average_pleb_fitness(self): """ Return the average normalised fitness of the generation. """ population = len(self.populace) average_fitness = 0 for populum in self.populace: average_fitness += populum["fitness"] / population return average_fitness @property def average_bourgeois_fitness(self): """ Return the average normalised fitness of the bourgeoisie. """ population = len(self.bourgeoisie) average_fitness = 0 for populum in self.bourgeoisie: average_fitness += populum["fitness"] / population return average_fitness