-
Notifications
You must be signed in to change notification settings - Fork 208
Open
Description
https://github.com/lantunes/skipatom
Intended usage:
from pymatgen.core.composition import Composition
compositions = [Composition("Al2O3"), Composition("CeCoGe3")]from matminer.featurizers.composition.composite import ElementProperty
ep = ElementProperty.from_preset("skipatom")
comp_fingerprints = ep.featurize_many(compositions)I think the following would be a good example to follow:
matminer/matminer/utils/data.py
Lines 386 to 411 in 886524a
| class MatscholarElementData(AbstractData): | |
| """ | |
| Class to get word embedding vectors of elements. These word embeddings were | |
| generated using NLP + Neural Network techniques on more than 3 million | |
| scientific abstracts. | |
| The data returned by this class are simply learned representations of the | |
| elements, taken from: | |
| Tshitoyan, V., Dagdelen, J., Weston, L. et al. Unsupervised word embeddings | |
| capture latent knowledge from materials science literature. Nature 571, | |
| 95–98 (2019). https://doi.org/10.1038/s41586-019-1335-8 | |
| """ | |
| def __init__(self): | |
| dfile = os.path.join(module_dir, "data_files/matscholar_els.json") | |
| with open(dfile) as fp: | |
| embeddings = json.load(fp) | |
| self.prop_names = [f"embedding {i}" for i in range(1, 201)] | |
| all_element_data = {} | |
| for el, embedding in embeddings.items(): | |
| all_element_data[el] = dict(zip(self.prop_names, embedding)) | |
| self.all_element_data = all_element_data | |
| def get_elemental_property(self, elem, property_name): | |
| return self.all_element_data[str(elem)][property_name] |
Followed by the appropriate incorporation into:
matminer/matminer/featurizers/composition/composite.py
Lines 18 to 244 in 886524a
| class ElementProperty(BaseFeaturizer): | |
| """ | |
| Class to calculate elemental property attributes. | |
| To initialize quickly, use the from_preset() method. | |
| Features: Based on the statistics of the data_source chosen, computed | |
| by element stoichiometry. The format generally is: | |
| "{data source} {statistic} {property}" | |
| For example: | |
| "PymetgenData range X" # Range of electronegativity from Pymatgen data | |
| For a list of all statistics, see the PropertyStats documentation; for a | |
| list of all attributes available for a given data_source, see the | |
| documentation for the data sources (e.g., PymatgenData, MagpieData, | |
| MatscholarElementData, etc.). | |
| Args: | |
| data_source (AbstractData or str): source from which to retrieve | |
| element property data (or use str for preset: "pymatgen", | |
| "magpie", or "deml") | |
| features (list of strings): List of elemental properties to use | |
| (these must be supported by data_source) | |
| stats (list of strings): a list of weighted statistics to compute to for each | |
| property (see PropertyStats for available stats) | |
| """ | |
| def __init__(self, data_source, features, stats): | |
| if data_source == "pymatgen": | |
| self.data_source = PymatgenData() | |
| elif data_source == "magpie": | |
| self.data_source = MagpieData() | |
| elif data_source == "deml": | |
| self.data_source = DemlData() | |
| elif data_source == "matscholar_el": | |
| self.data_source = MatscholarElementData() | |
| elif data_source == "megnet_el": | |
| self.data_source = MEGNetElementData() | |
| else: | |
| self.data_source = data_source | |
| self.features = features | |
| self.stats = stats | |
| # Initialize stats computer | |
| self.pstats = PropertyStats() | |
| @classmethod | |
| def from_preset(cls, preset_name): | |
| """ | |
| Return ElementProperty from a preset string | |
| Args: | |
| preset_name: (str) can be one of "magpie", "deml", "matminer", | |
| "matscholar_el", or "megnet_el". | |
| Returns: | |
| ElementProperty based on the preset name. | |
| """ | |
| if preset_name == "magpie": | |
| data_source = "magpie" | |
| features = [ | |
| "Number", | |
| "MendeleevNumber", | |
| "AtomicWeight", | |
| "MeltingT", | |
| "Column", | |
| "Row", | |
| "CovalentRadius", | |
| "Electronegativity", | |
| "NsValence", | |
| "NpValence", | |
| "NdValence", | |
| "NfValence", | |
| "NValence", | |
| "NsUnfilled", | |
| "NpUnfilled", | |
| "NdUnfilled", | |
| "NfUnfilled", | |
| "NUnfilled", | |
| "GSvolume_pa", | |
| "GSbandgap", | |
| "GSmagmom", | |
| "SpaceGroupNumber", | |
| ] | |
| stats = ["minimum", "maximum", "range", "mean", "avg_dev", "mode"] | |
| elif preset_name == "deml": | |
| data_source = "deml" | |
| stats = ["minimum", "maximum", "range", "mean", "std_dev"] | |
| features = [ | |
| "atom_num", | |
| "atom_mass", | |
| "row_num", | |
| "col_num", | |
| "atom_radius", | |
| "molar_vol", | |
| "heat_fusion", | |
| "melting_point", | |
| "boiling_point", | |
| "heat_cap", | |
| "first_ioniz", | |
| "electronegativity", | |
| "electric_pol", | |
| "GGAU_Etot", | |
| "mus_fere", | |
| "FERE correction", | |
| ] | |
| elif preset_name == "matminer": | |
| data_source = "pymatgen" | |
| stats = ["minimum", "maximum", "range", "mean", "std_dev"] | |
| features = [ | |
| "X", | |
| "row", | |
| "group", | |
| "block", | |
| "atomic_mass", | |
| "atomic_radius", | |
| "mendeleev_no", | |
| "electrical_resistivity", | |
| "velocity_of_sound", | |
| "thermal_conductivity", | |
| "melting_point", | |
| "bulk_modulus", | |
| "coefficient_of_linear_thermal_expansion", | |
| ] | |
| elif preset_name == "matscholar_el": | |
| data_source = "matscholar_el" | |
| stats = ["minimum", "maximum", "range", "mean", "std_dev"] | |
| features = MatscholarElementData().prop_names | |
| elif preset_name == "megnet_el": | |
| data_source = "megnet_el" | |
| stats = ["minimum", "maximum", "range", "mean", "std_dev"] | |
| features = MEGNetElementData().prop_names | |
| else: | |
| raise ValueError("Invalid preset_name specified!") | |
| return cls(data_source, features, stats) | |
| def featurize(self, comp): | |
| """ | |
| Get elemental property attributes | |
| Args: | |
| comp: Pymatgen composition object | |
| Returns: | |
| all_attributes: Specified property statistics of features | |
| """ | |
| all_attributes = [] | |
| # Get the element names and fractions | |
| elements, fractions = zip(*comp.element_composition.items()) | |
| for attr in self.features: | |
| elem_data = [self.data_source.get_elemental_property(e, attr) for e in elements] | |
| for stat in self.stats: | |
| all_attributes.append(self.pstats.calc_stat(elem_data, stat, fractions)) | |
| return all_attributes | |
| def feature_labels(self): | |
| labels = [] | |
| for attr in self.features: | |
| src = self.data_source.__class__.__name__ | |
| for stat in self.stats: | |
| labels.append(f"{src} {stat} {attr}") | |
| return labels | |
| def citations(self): | |
| if self.data_source.__class__.__name__ == "MagpieData": | |
| citation = [ | |
| "@article{ward_agrawal_choudary_wolverton_2016, title={A general-purpose " | |
| "machine learning framework for predicting properties of inorganic materials}, " | |
| "volume={2}, DOI={10.1038/npjcompumats.2017.28}, number={1}, journal={npj " | |
| "Computational Materials}, author={Ward, Logan and Agrawal, Ankit and Choudhary, " | |
| "Alok and Wolverton, Christopher}, year={2016}}" | |
| ] | |
| elif self.data_source.__class__.__name__ == "DemlData": | |
| citation = [ | |
| "@article{deml_ohayre_wolverton_stevanovic_2016, title={Predicting density " | |
| "functional theory total energies and enthalpies of formation of metal-nonmetal " | |
| "compounds by linear regression}, volume={47}, DOI={10.1002/chin.201644254}, " | |
| "number={44}, journal={ChemInform}, author={Deml, Ann M. and Ohayre, Ryan and " | |
| "Wolverton, Chris and Stevanovic, Vladan}, year={2016}}" | |
| ] | |
| elif self.data_source.__class__.__name__ == "PymatgenData": | |
| citation = [ | |
| "@article{Ong2013, author = {Ong, Shyue Ping and Richards, William Davidson and Jain, Anubhav and Hautier, " | |
| "Geoffroy and Kocher, Michael and Cholia, Shreyas and Gunter, Dan and Chevrier, Vincent L. and Persson, " | |
| "Kristin A. and Ceder, Gerbrand}, doi = {10.1016/j.commatsci.2012.10.028}, issn = {09270256}, " | |
| "journal = {Computational Materials Science}, month = {feb}, pages = {314--319}, " | |
| "publisher = {Elsevier B.V.}, title = {{Python Materials Genomics (pymatgen): A robust, open-source python " | |
| "library for materials analysis}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S0927025612006295}, " | |
| "volume = {68}, year = {2013} } " | |
| ] | |
| elif self.data_source.__class__.__name__ == "MEGNetElementData": | |
| # TODO: Cite MEGNet publication (not preprint) once released! | |
| citation = [ | |
| "@ARTICLE{2018arXiv181205055C," | |
| "author = {{Chen}, Chi and {Ye}, Weike and {Zuo}, Yunxing and {Zheng}, Chen and {Ong}, Shyue Ping}," | |
| "title = '{Graph Networks as a Universal Machine Learning Framework for Molecules and Crystals}'," | |
| "journal = {arXiv e-prints}," | |
| "keywords = {Condensed Matter - Materials Science, Physics - Computational Physics}," | |
| "year = '2018'," | |
| "month = 'Dec'," | |
| "eid = {arXiv:1812.05055}," | |
| "pages = {arXiv:1812.05055}," | |
| "archivePrefix = {arXiv}," | |
| "eprint = {1812.05055}," | |
| "primaryClass = {cond-mat.mtrl-sci}," | |
| r"adsurl = {https://ui.adsabs.harvard.edu/\#abs/2018arXiv181205055C}," | |
| "adsnote = {Provided by the SAO/NASA Astrophysics Data System}}" | |
| ] | |
| else: | |
| citation = [] | |
| return citation | |
| def implementors(self): | |
| return ["Jiming Chen", "Logan Ward", "Anubhav Jain", "Alex Dunn"] |
Metadata
Metadata
Assignees
Labels
No labels