18 #include "isoSpec++.h"
24 #include <unordered_map>
37 #include "dirtyAllocator.h"
38 #include "operators.h"
40 #include "marginalTrek++.h"
42 #include "element_tables.h"
53 isotopeNumbers(new int[0]),
54 atomCounts(new int[0]),
57 marginals(new Marginal*[0])
63 const int* _isotopeNumbers,
64 const int* _atomCounts,
65 const double*
const * _isotopeMasses,
66 const double*
const * _isotopeProbabilities
69 dimNumber(_dimNumber),
70 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
71 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
72 confSize(_dimNumber * sizeof(int)),
76 for(
int ii = 0; ii < dimNumber; ++ii)
77 allDim += isotopeNumbers[ii];
79 std::unique_ptr<double[]> masses(
new double[allDim]);
80 std::unique_ptr<double[]> probs(
new double[allDim]);
83 for(
int ii = 0; ii < dimNumber; ++ii)
84 for(
int jj = 0; jj < isotopeNumbers[ii]; ++jj)
86 masses[idx] = _isotopeMasses[ii][jj];
87 probs[idx] = _isotopeProbabilities[ii][jj];
94 setupMarginals(masses.get(), probs.get());
98 delete[] isotopeNumbers;
103 isotopeNumbers =
nullptr;
104 atomCounts =
nullptr;
111 const int* _isotopeNumbers,
112 const int* _atomCounts,
113 const double* _isotopeMasses,
114 const double* _isotopeProbabilities
117 dimNumber(_dimNumber),
118 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
119 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
120 confSize(_dimNumber * sizeof(int)),
125 setupMarginals(_isotopeMasses, _isotopeProbabilities);
141 disowned(other.disowned),
142 dimNumber(other.dimNumber),
143 isotopeNumbers(other.isotopeNumbers),
144 atomCounts(other.atomCounts),
145 confSize(other.confSize),
146 allDim(other.allDim),
147 marginals(other.marginals)
149 other.disowned =
true;
153 Iso::Iso(
const Iso& other,
bool fullcopy) :
155 dimNumber(other.dimNumber),
156 isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
157 atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
158 confSize(other.confSize),
159 allDim(other.allDim),
160 marginals(fullcopy ? new
Marginal*[dimNumber] : other.marginals)
183 return Iso(dimNr, aa_isotope_numbers,
atomCounts, use_nominal_masses ? aa_elem_nominal_masses : aa_elem_masses, aa_elem_probabilities);
186 inline void Iso::setupMarginals(
const double* _isotopeMasses,
const double* _isotopeProbabilities)
198 &_isotopeProbabilities[
allDim],
232 bool Iso::doMarginalsNeedSorting()
const
234 int nontrivial_marginals = 0;
238 nontrivial_marginals++;
239 if(nontrivial_marginals > 1)
249 mass +=
marginals[ii]->getLightestConfMass();
257 mass +=
marginals[ii]->getHeaviestConfMass();
265 mass +=
marginals[ii]->getMonoisotopicConfMass();
273 lprob +=
marginals[ii]->getSmallestLProb();
310 Iso::Iso(
const char* formula,
bool use_nominal_masses) :
315 std::vector<double> isotope_masses;
316 std::vector<double> isotope_probabilities;
320 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
324 void Iso::addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities)
326 Marginal* m =
new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
349 double log_R2 = log(InverseChiSquareCDF2(K, target_total_prob));
352 priorities[ii] =
marginals[ii]->getLogSizeEstimate(log_R2);
355 unsigned int parse_formula(
const char* formula, std::vector<double>& isotope_masses, std::vector<double>& isotope_probabilities,
int** isotopeNumbers,
int** atomCounts,
unsigned int* confSize,
bool use_nominal_masses)
358 size_t slen = strlen(formula);
362 std::vector<std::pair<const char*, size_t> > elements;
363 std::vector<int> numbers;
366 throw std::invalid_argument(
"Invalid formula: can't be empty");
368 if(!isdigit(formula[slen-1]))
369 throw std::invalid_argument(
"Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
371 for(
size_t ii = 0; ii < slen; ii++)
372 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
373 throw std::invalid_argument(
"Invalid formula: contains invalid (non-digit, non-alpha) character");
377 while(position < slen)
379 size_t elem_end = position;
380 while(isalpha(formula[elem_end]))
382 size_t digit_end = elem_end;
383 while(isdigit(formula[digit_end]))
385 elements.emplace_back(&formula[position], elem_end-position);
386 numbers.push_back(std::stoi(&formula[elem_end]));
387 position = digit_end;
390 std::vector<int> element_indexes;
392 for (
unsigned int i = 0; i < elements.size(); i++)
395 for(
int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
397 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
404 throw std::invalid_argument(
"Invalid formula");
405 element_indexes.push_back(idx);
408 std::vector<int> _isotope_numbers;
409 const double* masses = use_nominal_masses ? elem_table_massNo : elem_table_mass;
411 for(std::vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
415 int elem_ID = elem_table_ID[at_idx];
416 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_ID[at_idx] == elem_ID)
418 isotope_masses.push_back(masses[at_idx]);
419 isotope_probabilities.push_back(elem_table_probability[at_idx]);
423 _isotope_numbers.push_back(num);
426 const unsigned int dimNumber = elements.size();
428 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
429 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
430 *confSize = dimNumber *
sizeof(int);
444 mode_lprob(getModeLProb()),
445 partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
446 partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
447 partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
476 static const double minsqrt = -1.3407796239501852e+154;
478 IsoThresholdGenerator::IsoThresholdGenerator(
Iso&& iso,
double _threshold,
bool _absolute,
int tabSize,
int hashSize,
bool reorder_marginals)
480 Lcutoff(_threshold <= 0.0 ? minsqrt : (_absolute ? log(_threshold) : log(_threshold) + mode_lprob))
488 const bool marginalsNeedSorting = doMarginalsNeedSorting();
494 Lcutoff - mode_lprob +
marginals[ii]->fastGetModeLProb(),
495 marginalsNeedSorting,
499 if(!marginalResultsUnsorted[ii]->inRange(0))
506 int* tmpMarginalOrder =
new int[
dimNumber];
509 tmpMarginalOrder[ii] = ii;
511 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, comparator);
515 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
519 marginalOrder[tmpMarginalOrder[ii]] = ii;
521 delete[] tmpMarginalOrder;
525 marginalResults = marginalResultsUnsorted;
526 marginalOrder =
nullptr;
535 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
537 lProbs_ptr = lProbs_ptr_start;
540 partialLProbs_second++;
551 lcfmsv = std::numeric_limits<double>::infinity();
560 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
563 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
576 std::unique_ptr<const double* []> lProbs_restarts(
new const double*[
dimNumber]);
579 lProbs_restarts[ii] = lProbs_ptr_l;
583 while(*lProbs_ptr_l < lcfmsv)
588 count += lProbs_ptr_l - lProbs_ptr_start + 1;
591 int * cntr_ptr = counter;
604 lProbs_ptr_l = lProbs_restarts[idx];
605 while(*lProbs_ptr_l < lcfmsv)
607 for(idx--; idx > 0; idx--)
608 lProbs_restarts[idx] = lProbs_ptr_l;
630 memset(counter, 0,
sizeof(
int)*
dimNumber);
634 lProbs_ptr = lProbs_ptr_start - 1;
637 IsoThresholdGenerator::~IsoThresholdGenerator()
640 delete[] maxConfsLPSum;
641 if (marginalResultsUnsorted != marginalResults)
642 delete[] marginalResultsUnsorted;
643 dealloc_table(marginalResults,
dimNumber);
644 if(marginalOrder !=
nullptr)
645 delete[] marginalOrder;
654 IsoLayeredGenerator::IsoLayeredGenerator(Iso&& iso,
int tabSize,
int hashSize,
bool reorder_marginals,
double t_prob_hint)
655 : IsoGenerator(std::move(iso))
659 currentLThreshold = nextafter(mode_lprob, -std::numeric_limits<double>::infinity());
660 lastLThreshold = (std::numeric_limits<double>::min)();
661 marginalResultsUnsorted =
new LayeredMarginal*[
dimNumber];
662 resetPositions =
new const double*[
dimNumber];
663 marginalsNeedSorting = doMarginalsNeedSorting();
665 memset(counter, 0,
sizeof(
int)*
dimNumber);
668 marginalResultsUnsorted[ii] =
new LayeredMarginal(std::move(*(
marginals[ii])), tabSize, hashSize);
672 double* marginal_priorities =
new double[
dimNumber];
676 int* tmpMarginalOrder =
new int[
dimNumber];
679 tmpMarginalOrder[ii] = ii;
681 TableOrder<double> TO(marginal_priorities);
683 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, TO);
684 marginalResults =
new LayeredMarginal*[
dimNumber];
687 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
691 marginalOrder[tmpMarginalOrder[ii]] = ii;
693 delete[] tmpMarginalOrder;
694 delete[] marginal_priorities;
698 marginalResults = marginalResultsUnsorted;
699 marginalOrder =
nullptr;
708 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
710 lProbs_ptr = lProbs_ptr_start;
713 partialLProbs_second++;
717 lastLThreshold = 10.0;
718 IsoLayeredGenerator::nextLayer(-0.00001);
721 bool IsoLayeredGenerator::nextLayer(
double offset)
723 size_t first_mrg_size = marginalResults[0]->
get_no_confs();
728 lastLThreshold = currentLThreshold;
729 currentLThreshold += offset;
733 marginalResults[ii]->
extend(currentLThreshold - mode_lprob + marginalResults[ii]->fastGetModeLProb(), marginalsNeedSorting);
739 lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
742 resetPositions[ii] = lProbs_ptr;
749 bool IsoLayeredGenerator::carry()
755 int * cntr_ptr = counter;
764 if(
partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
769 lProbs_ptr = resetPositions[idx];
771 while(*lProbs_ptr <= last_lcfmsv)
774 for(
int ii = 0; ii < idx; ii++)
775 resetPositions[ii] = lProbs_ptr;
790 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
793 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
796 IsoLayeredGenerator::~IsoLayeredGenerator()
799 delete[] maxConfsLPSum;
800 delete[] resetPositions;
801 if (marginalResultsUnsorted != marginalResults)
802 delete[] marginalResultsUnsorted;
803 dealloc_table(marginalResults,
dimNumber);
804 if(marginalOrder !=
nullptr)
805 delete[] marginalOrder;
814 IsoOrderedGenerator::IsoOrderedGenerator(
Iso&& iso,
int _tabSize,
int _hashSize) :
815 IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
832 masses[i] = &marginalResults[i]->conf_masses();
833 logProbs[i] = &marginalResults[i]->conf_lprobs();
834 marginalConfs[i] = &marginalResults[i]->confs();
837 topConf = allocator.newConf();
839 reinterpret_cast<char*
>(topConf) +
sizeof(
double),
844 *(
reinterpret_cast<double*
>(topConf)) =
857 dealloc_table<MarginalTrek*>(marginalResults,
dimNumber);
860 delete[] marginalConfs;
876 int* topConfIsoCounts = getConf(topConf);
878 currentLProb = *(
reinterpret_cast<double*
>(topConf));
879 currentMass = combinedSum( topConfIsoCounts, masses,
dimNumber );
880 currentProb = exp(currentLProb);
885 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
889 topConfIsoCounts[j]++;
890 *(
reinterpret_cast<double*
>(topConf)) = combinedSum(topConfIsoCounts, logProbs,
dimNumber);
892 topConfIsoCounts[j]--;
897 void* acceptedCandidate = allocator.newConf();
898 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
899 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts,
confSize);
901 acceptedCandidateIsoCounts[j]++;
903 *(
reinterpret_cast<double*
>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs,
dimNumber);
905 pq.push(acceptedCandidate);
908 if(topConfIsoCounts[j] > 0)
912 topConfIsoCounts[ccount]++;
923 IsoStochasticGenerator::IsoStochasticGenerator(
Iso&& iso,
size_t no_molecules,
double _precision,
double _beta_bias) :
925 ILG(std::move(*this)),
926 to_sample_left(no_molecules),
927 precision(_precision),
928 beta_bias(_beta_bias),
The generator of isotopologues.
virtual ~IsoGenerator()
Destructor.
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
The Iso class for the calculation of the isotopic distribution.
void saveMarginalLogSizeEstimates(double *priorities, double target_total_prob) const
Save estimates of logarithms of target sizes of marginals using Gaussian approximation into argument ...
double getHeaviestPeakMass() const
Get the mass of the heaviest peak in the isotopic distribution.
double getTheoreticalAverageMass() const
Get the theoretical average mass of the molecule.
double getUnlikeliestPeakLProb() const
Get the logprobability of the least probable subisotopologue.
void addElement(int atomCount, int noIsotopes, const double *isotopeMasses, const double *isotopeProbabilities)
Add an element to the molecule. Note: this method can only be used BEFORE Iso is used to construct an...
double getModeMass() const
Get the mass of the mode-configuration (if there are many modes, it is undefined which one will be se...
static Iso FromFASTA(const char *fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C string.
double getLightestPeakMass() const
Get the mass of the lightest peak in the isotopic distribution.
double variance() const
Get the theoretical variance of the distribution.
double getMonoisotopicPeakMass() const
virtual ~Iso()
Destructor.
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
void terminate_search()
Block the subsequent search of isotopologues.
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
virtual ~IsoOrderedGenerator()
Destructor.
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
void terminate_search()
Block the subsequent search of isotopologues.
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
const double * get_lProbs_ptr() const
get the pointer to lProbs array. Accessing index -1 is legal and returns a guardian of -inf....
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues, see details in PrecalculatedMarginal::get_no_confs.
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
The marginal distribution class (a subisotopologue).
double fastGetModeLProb()
Get the log-probability of the mode subisotopologue. Results undefined if ensureModeConf() wasn't cal...
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues.
const double * get_lProbs_ptr() const
Get the table of the log-probabilities of subisotopologues.