Best Python code snippet using molecule_python
molecules_to_cov.py
Source:molecules_to_cov.py
1"""2Given a molecule file (as output by CAMPAREE), generate a coverage file3"""4import argparse5import collections6import re7import numpy8parser = argparse.ArgumentParser(description="Given a molecule file (as output by CAMPAREE), generate a coverage file in the bedGraph format")9parser.add_argument("molecule_file", help="path to the input molecule file")10parser.add_argument("output_file", help="prefix for the output coverage files, two are output as output_file.forward.cov and output_file.reverse.cov")11parser.add_argument("-m", "--max_chromosome_size", help="size of largest chromosome in megabases (250 for humans, 196 for mouse), this can be an overestimate, determines memory use", default=260, type=int)12args = parser.parse_args()13cigar_re = re.compile("[\d]+[MINDSH]")14def make_chrom():15 print(f"Creating the {len(coverages)+1} chromosome")16 return numpy.zeros(args.max_chromosome_size * 1_000_000, dtype="int32")17coverages = collections.defaultdict(make_chrom)18with open(args.molecule_file) as molecule_file:19 for line in molecule_file:20 if line.startswith("#"):21 continue #Skip comment/header line22 transcript_id, chrom, start, cigar, ref_start, ref_cigar, strand, sequence = line.split('\t')23 # Really just need the ref_start, ref_cigar, chromosome and strand for coverage24 ref_start = int(ref_start)25 cigar_parts = cigar_re.findall(ref_cigar)26 reference_idx = ref_start27 for part in cigar_parts:28 match_length = int(part[:-1])29 match_type = part[-1]30 if match_type == "M":31 # Count the match and move along the reference32 coverages[chrom,strand][reference_idx:reference_idx+match_length] += 133 reference_idx += match_length34 elif match_type in "ND":35 # No match but we still move positions on the reference36 reference_idx += match_length37 else:38 # do nothing39 pass40with open(args.output_file + ".forward.cov", "w") as fwd_file, open(args.output_file + ".reverse.cov", "w") as rev_file:41 # Output headers42 fwd_header = f'track type=bedGraph name="Coverage {args.molecule_file} forward strand" description="Coverage for {args.molecule_file} forward strand" visibility=full color=0,0,255 priority=20\n'43 rev_header = f'track type=bedGraph name="Coverage {args.molecule_file} reverse strand" description="Coverage for {args.molecule_file} reverse strand" visibility=full color=255,0,0 priority=20\n'44 fwd_file.write(fwd_header)45 rev_file.write(rev_header)46 # Output the coverage of each strand of each chromosome47 # sorted alphabetically48 for chrom,strand in sorted(coverages.keys()):49 print(f"Starting chrom {chrom} strand {strand}")50 coverage = coverages[chrom,strand]51 # We allocated too much space for every chromosome and most didn't use it52 # unused is left as zeros, so we can just drop those on the end53 coverage = numpy.trim_zeros(coverage, 'b')54 print(f"\t{len(coverage)} bases to output")55 if strand == "+":56 cov_file = fwd_file57 else:58 cov_file = rev_file59 block_start = None60 block_height = None61 for i, height in enumerate(coverage):62 if block_height is None:63 # Start our first block64 block_start = i - 1 # Convert to 0-based for UCSC browser65 block_height = height66 elif block_height == height:67 # Still part of the existing block68 continue69 else:70 # Convert to 0-based for UCSC, so this gives the interval (block_start, block_end)71 # which is half-open 0 based and hence goes up to but not including the 1-based position i72 block_end = i - 173 # Don't output anything for 0s, but for everything else output the block74 if block_height != 0:75 # Write out the block to the bed file76 cov_file.write('\t'.join([chrom,77 str(block_start),78 str(block_end),79 str(block_height)]) + "\n")80 # Start a new block81 block_start = i - 1# Convert to 0-based for UCSC browser82 block_height = height83 if block_height is not None and block_height != 0:84 # Output the last block85 block_end = i - 1 # Convert to 0-based for UCSC browser86 # Write out the block to the bed file87 cov_file.write('\t'.join([chrom,88 str(block_start),89 str(block_end),...
load_mol.py
Source:load_mol.py
1from sys import stderr2from io import StringIO3import os4import numpy as np5from rdkit import Chem6from rdkit.Chem import AllChem7class MoleculeLoadException(Exception):8 def __init__(self, *args, **kwargs):9 Exception.__init__(*args, **kwargs)10def get_xyz_from_mol(mol):11 xyz = np.zeros((mol.GetNumAtoms(), 3))12 conf = mol.GetConformer()13 for i in range(conf.GetNumAtoms()):14 position = conf.GetAtomPosition(i)15 xyz[i, 0] = position.x16 xyz[i, 1] = position.y17 xyz[i, 2] = position.z18 return (xyz)19def add_hydrogens_to_mol(mol):20 """21 Add hydrogens to a molecule object22 TODO (LESWING) see if there are more flags to add here for default23 :param mol: Rdkit Mol24 :return: Rdkit Mol25 """26 molecule_file = None27 try:28 pdbblock = Chem.MolToPDBBlock(mol)29 pdb_stringio = StringIO()30 pdb_stringio.write(pdbblock)31 pdb_stringio.seek(0)32 import pdbfixer33 fixer = pdbfixer.PDBFixer(pdbfile=pdb_stringio)34 fixer.findMissingResidues()35 fixer.findMissingAtoms()36 fixer.addMissingAtoms()37 fixer.addMissingHydrogens(7.4)38 hydrogenated_io = StringIO()39 import simtk40 simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io)41 hydrogenated_io.seek(0)42 return Chem.MolFromPDBBlock(hydrogenated_io.read(), sanitize=False, removeHs=False)43 except ValueError as e:44 print('Unable to add hydrogens %s' % e, file=stderr)45 raise MoleculeLoadException(e)46 finally:47 try:48 os.remove(molecule_file)49 except (OSError, TypeError):50 pass51def compute_charges(mol):52 try:53 AllChem.ComputeGasteigerCharges(mol)54 except Exception as e:55 print('Unable to compute charges for mol', file=stderr)56 raise MoleculeLoadException(e)57 return mol58def load_molecule(molecule_file, add_hydrogens=True, calc_charges=True, sanitize=False):59 if '.mol2' in molecule_file:60 my_mol = Chem.MolFromMol2File(molecule_file, sanitize=False, removeHs=False)61 elif '.sdf' in molecule_file:62 suppl = Chem.SDMolSupplier(str(molecule_file), sanitize=False)63 my_mol = suppl[0]64 elif '.pdb' in molecule_file:65 my_mol = Chem.MolFromPDBFile(str(molecule_file), sanitize=False, removeHs=False)66 else:67 raise ValueError('Unrecognized file type')68 if my_mol is None:69 raise ValueError('Unable to read non None Molecule Object')70 if add_hydrogens or calc_charges:71 my_mol = add_hydrogens_to_mol(my_mol)72 if sanitize:73 Chem.SanitizeMol(my_mol)74 if calc_charges:75 compute_charges(my_mol)76 xyz = get_xyz_from_mol(my_mol)...
test_predictor.py
Source:test_predictor.py
1from pathlib import Path2from click.testing import CliRunner3import bondnet4from bondnet.prediction.predictor import (5 predict_single_molecule,6 predict_multiple_molecules,7 predict_by_reactions,8)9from bondnet.scripts.predict_cli import cli10def test_predict_single_molecule():11 predict_single_molecule(model_name="bdncm", molecule="CC")12 predict_single_molecule(model_name="pubchem", molecule="CC")13def test_predict_multiple_molecules():14 prefix = Path(bondnet.__file__).parent.joinpath("scripts", "examples", "predict")15 molecule_file = prefix.joinpath("molecules.sdf")16 charge_file = prefix.joinpath("charges.txt")17 predict_multiple_molecules(18 model_name="bdncm",19 molecule_file=molecule_file,20 charge_file=charge_file,21 out_file="/tmp/bde.sdf",22 format="sdf",23 )24 predict_multiple_molecules(25 model_name="pubchem",26 molecule_file=molecule_file,27 charge_file=None,28 out_file="/tmp/bde.sdf",29 format="sdf",30 )31def test_predict_by_reaction():32 prefix = Path(bondnet.__file__).parent.joinpath("scripts", "examples", "predict")33 molecule_file = prefix.joinpath("molecules.sdf")34 rxn_file = prefix.joinpath("reactions.csv")35 charge_file = prefix.joinpath("charges.txt")36 predict_by_reactions(37 model_name="bdncm",38 molecule_file=molecule_file,39 reaction_file=rxn_file,40 charge_file=charge_file,41 out_file="/tmp/bde.csv",42 format="sdf",43 )44 predict_by_reactions(45 model_name="bdncm",46 molecule_file=molecule_file,47 reaction_file=rxn_file,48 charge_file=None,49 out_file="/tmp/bde.csv",50 format="sdf",51 )52def test_cli():53 runner = CliRunner()54 result = runner.invoke(cli, ["single", "CC", "0"])55 molecule_file = Path(bondnet.__file__).parent.joinpath(56 "prediction", "examples", "molecules.sdf"57 )...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!