|  | OpenMS
    2.6.0
    | 
 
 
  
  
 
Go to the documentation of this file.
  152     static const std::array<std::string, (
Size)Unmatched::SIZE_OF_UNMATCHED> names_of_unmatched;
 
  159       SIZE_OF_MISSING_DECOY
 
  161     static const std::array<std::string, (
Size)MissingDecoy::SIZE_OF_MISSING_DECOY> names_of_missing_decoy;
 
  171     inline ExitCodes run(std::vector<FASTAFile::FASTAEntry>& proteins, std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids)
 
  174       return run<TFI_Vector>(protein_container, prot_ids, pep_ids);
 
  216       if (decoy_string_.empty())
 
  224           OPENMS_LOG_WARN << 
"Unable to determine decoy string automatically (not enough decoys were detected)! Using default " << (r.is_prefix ? 
"prefix" : 
"suffix") << 
" decoy string '" << r.name << 
"'\n" 
  225                           << 
"If you think that this is incorrect, please provide a decoy_string and its position manually!" << std::endl;
 
  227         prefix_ = r.is_prefix;
 
  228         decoy_string_ = r.name;
 
  230         OPENMS_LOG_INFO << 
"Using " << (prefix_ ? 
"prefix" : 
"suffix") << 
" decoy string '" << decoy_string_ << 
"'" << std::endl;
 
  237       if (!enzyme_name_.empty() && (enzyme_name_.compare(AUTO_MODE) != 0))
 
  241       else if (!prot_ids.empty() && prot_ids[0].getSearchParameters().digestion_enzyme.getName() != 
"unknown_enzyme")
 
  243         OPENMS_LOG_INFO << 
"Info: using '" << prot_ids[0].getSearchParameters().digestion_enzyme.getName() << 
"' as enzyme (obtained from idXML) for digestion." << std::endl;
 
  244         enzyme.
setEnzyme(&prot_ids[0].getSearchParameters().digestion_enzyme);
 
  248         OPENMS_LOG_WARN << 
"Warning: Enzyme name neither given nor deduceable from input. Defaulting to Trypsin!" << std::endl;
 
  252       bool xtandem_fix_parameters = 
true;
 
  253       bool msgfplus_fix_parameters = 
true;
 
  256       for (
const auto& prot_id : prot_ids)
 
  258         String search_engine = prot_id.getOriginalSearchEngineName();
 
  260         OPENMS_LOG_INFO << 
"Peptide identification engine: " << search_engine << std::endl;
 
  261         if (search_engine != 
"XTANDEM") { xtandem_fix_parameters = 
false; }
 
  262         if (!(search_engine == 
"MSGFPLUS" || search_engine == 
"MS-GF+")) { msgfplus_fix_parameters = 
false; }
 
  266       if (msgfplus_fix_parameters && enzyme.
getEnzymeName() == 
"Trypsin")
 
  268         OPENMS_LOG_WARN << 
"MSGFPlus detected but enzyme cutting rules were set to Trypsin. Correcting to Trypsin/P to copy with special cutting rule in MSGFPlus." << std::endl;
 
  274       if (!enzyme_specificity_.empty() && (enzyme_specificity_.compare(AUTO_MODE) != 0))
 
  280         enzyme.
setSpecificity(prot_ids[0].getSearchParameters().enzyme_term_specificity);
 
  285         OPENMS_LOG_WARN << 
"Warning: Enzyme specificity neither given nor present in the input file. Defaulting to 'full'!" << std::endl;
 
  293       const size_t PROTEIN_CACHE_SIZE = 4e5; 
 
  295       this->startProgress(0, 1, 
"Load first DB chunk");
 
  296       proteins.cacheChunk(PROTEIN_CACHE_SIZE);
 
  299       if (proteins.empty()) 
 
  301         OPENMS_LOG_ERROR << 
"Error: An empty database was provided. Mapping makes no sense. Aborting..." << std::endl;
 
  302         return DATABASE_EMPTY;
 
  307         OPENMS_LOG_WARN << 
"Warning: An empty set of peptide identifications was provided. Output will be empty as well." << std::endl;
 
  308         if (!keep_unreferenced_proteins_)
 
  311           for (std::vector<ProteinIdentification>::iterator it = prot_ids.begin();
 
  312             it != prot_ids.end(); ++it)
 
  314             it->getHits().clear();
 
  317         return PEPTIDE_IDS_EMPTY;
 
  322       std::vector<bool> protein_is_decoy; 
 
  323       std::vector<std::string> protein_accessions; 
 
  325       bool invalid_protein_sequence = 
false; 
 
  332         bool has_illegal_AAs(
false);
 
  334         for (std::vector<PeptideIdentification>::const_iterator it1 = pep_ids.begin(); it1 != pep_ids.end(); ++it1)
 
  337           const std::vector<PeptideHit>& hits = it1->getHits();
 
  338           for (std::vector<PeptideHit>::const_iterator it2 = hits.begin(); it2 != hits.end(); ++it2)
 
  344             String seq = it2->getSequence().toUnmodifiedString().
remove(
'*'); 
 
  347               OPENMS_LOG_ERROR << 
"Peptide sequence '" << it2->getSequence() << 
"' contains one or more ambiguous amino acids (B|J|Z|X).\n";
 
  348               has_illegal_AAs = 
true;
 
  354             appendValue(pep_DB, seq.c_str());
 
  359           OPENMS_LOG_ERROR << 
"One or more peptides contained illegal amino acids. This is not allowed!" 
  360                     << 
"\nPlease either remove the peptide or replace it with one of the unambiguous ones (while allowing for ambiguous AA's to match the protein)." << std::endl;;
 
  363         OPENMS_LOG_INFO << 
"Mapping " << length(pep_DB) << 
" peptides to " << (proteins.size() == PROTEIN_CACHE_SIZE ? 
"? (unknown number of)" : 
String(proteins.size()))  << 
" proteins." << std::endl;
 
  365         if (length(pep_DB) == 0)
 
  367           OPENMS_LOG_WARN << 
"Warning: Peptide identifications have no hits inside! Output will be empty as well." << std::endl;
 
  368           return PEPTIDE_IDS_EMPTY;
 
  374         OPENMS_LOG_INFO << 
"Searching with up to " << aaa_max_ << 
" ambiguous amino acid(s) and " << mm_max_ << 
" mismatch(es)!" << std::endl;
 
  385         uint16_t count_j_proteins(0);
 
  386         bool has_active_data = 
true; 
 
  387         const std::string jumpX(aaa_max_ + mm_max_ + 1, 
'X'); 
 
  389         this->startProgress(0, proteins.size() == PROTEIN_CACHE_SIZE ? std::numeric_limits<SignedSize>::max() : proteins.size(), 
"Aho-Corasick");
 
  390         std::atomic<int> progress_prots(0);
 
  402             #pragma omp barrier // all threads need to be here, since we are about to swap protein data 
  405               DEBUG_ONLY std::cerr << 
" activating cache ...\n";
 
  406               has_active_data = proteins.activateCache(); 
 
  407               protein_accessions.resize(proteins.getChunkOffset() + proteins.chunkSize());
 
  410             if (!has_active_data) 
break; 
 
  415               DEBUG_ONLY std::cerr << 
"Filling Protein Cache ...";
 
  416               proteins.cacheChunk(PROTEIN_CACHE_SIZE);
 
  417               protein_is_decoy.resize(proteins.getChunkOffset() + prot_count);
 
  420                 const String& seq = proteins.chunkAt(i).identifier;
 
  421                 protein_is_decoy[i + proteins.getChunkOffset()] = (prefix_ ? seq.
hasPrefix(decoy_string_) : seq.
hasSuffix(decoy_string_));
 
  425             DEBUG_ONLY std::cerr << 
" starting for loop \n";
 
  427             #pragma omp for schedule(dynamic, 100) nowait 
  431               if (omp_get_thread_num() == 0)
 
  433                 this->setProgress(progress_prots);
 
  436               prot = proteins.chunkAt(i).sequence;
 
  440               if (prot.
has(
'[') || prot.
has(
'('))
 
  442                  invalid_protein_sequence = 
true; 
 
  461               Size prot_idx = i + proteins.getChunkOffset();
 
  470                 size_t offset = -1, start = 0;
 
  471                 while ((offset = prot.find(jumpX, offset + 1)) != std::string::npos)
 
  474                   addHits_(fuzzyAC, pattern, pep_DB, prot.
substr(start, offset + jumpX.size() - start), prot, prot_idx, (
int)start, func_threads);
 
  476                   while (offset + jumpX.size() < prot.size() && prot[offset + jumpX.size()] == 
'X') ++offset;
 
  481                 if (start < prot.size())
 
  483                   addHits_(fuzzyAC, pattern, pep_DB, prot.
substr(start), prot, prot_idx, (
int)start, func_threads);
 
  488                 addHits_(fuzzyAC, pattern, pep_DB, prot, prot, prot_idx, 0, func_threads);
 
  493                 protein_accessions[prot_idx] = proteins.chunkAt(i).identifier;
 
  494                 acc_to_prot_thread[protein_accessions[prot_idx]] = prot_idx;
 
  501             #pragma omp critical(PeptideIndexer_joinAC) 
  506               func.
merge(func_threads);
 
  508               acc_to_prot.insert(acc_to_prot_thread.begin(), acc_to_prot_thread.end());
 
  509               acc_to_prot_thread.clear();
 
  515         std::cout << 
"Merge took: " << s.
toString() << 
"\n";
 
  517         std::cout << mu.
delta(
"Aho-Corasick") << 
"\n\n";
 
  523                  << 
"     ... rejected by enzyme filter: " << func.
filter_rejected << std::endl;
 
  525         if (count_j_proteins)
 
  527           OPENMS_LOG_WARN << 
"PeptideIndexer found " << count_j_proteins << 
" protein sequences in your database containing the amino acid 'J'." 
  528             << 
"To match 'J' in a protein, an ambiguous amino acid placeholder for I/L will be used.\n" 
  529             << 
"This costs runtime and eats into the 'aaa_max' limit, leaving less opportunity for B/Z/X matches.\n" 
  530             << 
"If you want 'J' to be treated as unambiguous, enable '-IL_equivalent'!" << std::endl;
 
  540       for (
Size run_idx = 0; run_idx < prot_ids.size(); ++run_idx)
 
  542         runid_to_runidx[prot_ids[run_idx].getIdentifier()] = run_idx;
 
  546       Size stats_matched_unique(0);
 
  547       Size stats_matched_multi(0);
 
  548       Size stats_unmatched(0);    
 
  549       Size stats_count_m_t(0);    
 
  550       Size stats_count_m_d(0);    
 
  551       Size stats_count_m_td(0);   
 
  556       for (std::vector<PeptideIdentification>::iterator it1 = pep_ids.begin(); it1 != pep_ids.end(); ++it1)
 
  559         Size run_idx = runid_to_runidx[it1->getIdentifier()];
 
  561         std::vector<PeptideHit>& hits = it1->getHits();
 
  563         for (std::vector<PeptideHit>::iterator it_hit = hits.begin(); it_hit != hits.end(); )
 
  566           it_hit->setPeptideEvidences(std::vector<PeptideEvidence>());
 
  571           bool matches_target(
false);
 
  572           bool matches_decoy(
false);
 
  574           std::set<Size> prot_indices; 
 
  576           for (std::set<PeptideProteinMatchInformation>::const_iterator it_i = func.
pep_to_prot[pep_idx].begin();
 
  579             prot_indices.insert(it_i->protein_index);
 
  580             const String& accession = protein_accessions[it_i->protein_index];
 
  581             PeptideEvidence pe(accession, it_i->position, it_i->position + (
int)it_hit->getSequence().size() - 1, it_i->AABefore, it_i->AAAfter);
 
  582             it_hit->addPeptideEvidence(pe);
 
  584             runidx_to_protidx[run_idx].insert(it_i->protein_index); 
 
  586             if (protein_is_decoy[it_i->protein_index])
 
  588               matches_decoy = 
true;
 
  592               matches_target = 
true;
 
  597           if (matches_decoy && matches_target)
 
  599             it_hit->setMetaValue(
"target_decoy", 
"target+decoy");
 
  602           else if (matches_target)
 
  604             it_hit->setMetaValue(
"target_decoy", 
"target");
 
  607           else if (matches_decoy)
 
  609             it_hit->setMetaValue(
"target_decoy", 
"decoy");
 
  614           if (prot_indices.size() == 1)
 
  616             it_hit->setMetaValue(
"protein_references", 
"unique");
 
  617             ++stats_matched_unique;
 
  619           else if (prot_indices.size() > 1)
 
  621             it_hit->setMetaValue(
"protein_references", 
"non-unique");
 
  622             ++stats_matched_multi;
 
  627             if (stats_unmatched < 15) 
OPENMS_LOG_INFO << 
"Unmatched peptide: " << it_hit->getSequence() << 
"\n";
 
  628             else if (stats_unmatched == 15) 
OPENMS_LOG_INFO << 
"Unmatched peptide: ...\n";
 
  629             if (unmatched_action_ == Unmatched::REMOVE)
 
  631               it_hit = hits.erase(it_hit);
 
  636               it_hit->setMetaValue(
"protein_references", 
"unmatched");
 
  645       Size total_peptides = stats_count_m_t + stats_count_m_d + stats_count_m_td + stats_unmatched;
 
  649       OPENMS_LOG_INFO << 
"  unmatched                : " << stats_unmatched << 
" (" << stats_unmatched * 100 / total_peptides << 
" %)\n";
 
  651       OPENMS_LOG_INFO << 
"    match to target DB only: " << stats_count_m_t << 
" (" << stats_count_m_t * 100 / total_peptides << 
" %)\n";
 
  652       OPENMS_LOG_INFO << 
"    match to decoy DB only : " << stats_count_m_d << 
" (" << stats_count_m_d * 100 / total_peptides << 
" %)\n";
 
  653       OPENMS_LOG_INFO << 
"    match to both          : " << stats_count_m_td << 
" (" << stats_count_m_td * 100 / total_peptides << 
" %)\n";
 
  656       OPENMS_LOG_INFO << 
"    no match (to 0 protein)         : " << stats_unmatched << 
"\n";
 
  657       OPENMS_LOG_INFO << 
"    unique match (to 1 protein)     : " << stats_matched_unique << 
"\n";
 
  658       OPENMS_LOG_INFO << 
"    non-unique match (to >1 protein): " << stats_matched_multi << std::endl;
 
  661       Size stats_matched_proteins(0), stats_matched_new_proteins(0), stats_orphaned_proteins(0), stats_proteins_target(0), stats_proteins_decoy(0);
 
  664       for (
Size run_idx = 0; run_idx < prot_ids.size(); ++run_idx)
 
  666         std::set<Size> masterset = runidx_to_protidx[run_idx]; 
 
  668         std::vector<ProteinHit>& phits = prot_ids[run_idx].getHits();
 
  671           std::vector<ProteinHit> orphaned_hits;
 
  672           for (std::vector<ProteinHit>::iterator p_hit = phits.begin(); p_hit != phits.end(); ++p_hit)
 
  674             const String& acc = p_hit->getAccession();
 
  675             if (!acc_to_prot.
has(acc)) 
 
  677               ++stats_orphaned_proteins;
 
  678               if (keep_unreferenced_proteins_)
 
  680                 p_hit->setMetaValue(
"target_decoy", 
"");
 
  681                 orphaned_hits.push_back(*p_hit);
 
  686           phits = orphaned_hits;
 
  691         phits.reserve(phits.size() + masterset.size());
 
  692         for (std::set<Size>::const_iterator it = masterset.begin(); it != masterset.end(); ++it)
 
  697           if (write_protein_sequence_ || write_protein_description_)
 
  699             proteins.readAt(fe, *it);
 
  700             if (write_protein_sequence_)
 
  704             if (write_protein_description_)
 
  709           if (protein_is_decoy[*it])
 
  712             ++stats_proteins_decoy;
 
  717             ++stats_proteins_target;
 
  719           phits.push_back(hit);
 
  720           ++stats_matched_new_proteins;
 
  722         stats_matched_proteins += phits.size();
 
  729       OPENMS_LOG_INFO << 
"  total proteins searched: " << proteins.size() << 
"\n";
 
  730       OPENMS_LOG_INFO << 
"  matched proteins       : " << stats_matched_proteins << 
" (" << stats_matched_new_proteins << 
" new)\n";
 
  731       if (stats_matched_proteins)
 
  733         OPENMS_LOG_INFO << 
"  matched target proteins: " << stats_proteins_target << 
" (" << stats_proteins_target * 100 / stats_matched_proteins << 
" %)\n";
 
  734         OPENMS_LOG_INFO << 
"  matched decoy proteins : " << stats_proteins_decoy << 
" (" << stats_proteins_decoy * 100 / stats_matched_proteins << 
" %)\n";
 
  736       OPENMS_LOG_INFO << 
"  orphaned proteins      : " << stats_orphaned_proteins << (keep_unreferenced_proteins_ ? 
" (all kept)" : 
" (all removed)\n");
 
  741       bool has_error = 
false;
 
  743       if (invalid_protein_sequence)
 
  745         OPENMS_LOG_ERROR << 
"Error: One or more protein sequences contained the characters '[' or '(', which are illegal in protein sequences." 
  746                  << 
"\nPeptide hits might be masked by these characters (which usually indicate presence of modifications).\n";
 
  750       if ((stats_count_m_d + stats_count_m_td) == 0)
 
  752         String msg(
"No peptides were matched to the decoy portion of the database! Did you provide the correct concatenated database? Are your 'decoy_string' (=" + 
String(decoy_string_) + 
") and 'decoy_string_position' (=" + 
String(param_.getValue(
"decoy_string_position")) + 
") settings correct?");
 
  753         if (missing_decoy_action_ == MissingDecoy::IS_ERROR)
 
  755           OPENMS_LOG_ERROR << 
"Error: " << msg << 
"\nSet 'missing_decoy_action' to 'warn' if you are sure this is ok!\nAborting ..." << std::endl;
 
  758         else if (missing_decoy_action_ == MissingDecoy::WARN)
 
  760           OPENMS_LOG_WARN << 
"Warn: " << msg << 
"\nSet 'missing_decoy_action' to 'error' if you want to elevate this to an error!" << std::endl;
 
  767       if (stats_unmatched > 0)
 
  769         OPENMS_LOG_ERROR << 
"PeptideIndexer found unmatched peptides, which could not be associated to a protein.\n";
 
  770         if (unmatched_action_ == Unmatched::IS_ERROR)
 
  773             << 
"Potential solutions:\n" 
  774             << 
"   - check your FASTA database is identical to the search DB (or use 'auto')\n" 
  775             << 
"   - set 'enzyme:specificity' and 'enzyme:name' to 'auto' to match the parameters of the search engine\n" 
  776             << 
"   - increase 'aaa_max' to allow more ambiguous amino acids\n" 
  777             << 
"   - as a last resort: use the 'unmatched_action' option to accept or even remove unmatched peptides\n" 
  778             << 
"     (note that unmatched peptides cannot be used for FDR calculation or quantification)\n";
 
  781         else if (unmatched_action_ == Unmatched::WARN)
 
  783           OPENMS_LOG_ERROR << 
"  Warning: " << stats_unmatched << 
" unmatched hits have been found, but were not removed!\n" 
  784             << 
"These are not annotated with target/decoy information and might lead to issues with downstream tools (such as FDR).\n" 
  785             << 
"Switch to '" << names_of_unmatched[(
Size)Unmatched::REMOVE] << 
"' if you want to avoid these problems.\n";
 
  787         else if (unmatched_action_ == Unmatched::REMOVE)
 
  789           OPENMS_LOG_ERROR << 
"  Warning: " << stats_unmatched <<
" unmatched hits have been removed!\n" 
  790                            << 
"Make sure that these hits are actually a violation of the cutting rules by inspecting the database!\n";
 
  791           if (xtandem_fix_parameters) 
OPENMS_LOG_ERROR << 
"Since the results are from X!Tandem, this is probably ok (check anyways).\n";
 
  802         OPENMS_LOG_ERROR << 
"Result files will be written, but PeptideIndexer will exit with an error code." << std::endl;
 
  803         return UNEXPECTED_RESULT;
 
  808      const String& getDecoyString() 
const;
 
  810      bool isPrefix() 
const;
 
  821       const std::tuple<const Size&, const Int&, const char&, const char&> 
tie()
 const 
  823         return std::tie(protein_index, 
position, AABefore, AAAfter);
 
  827         return tie() < other.
tie();
 
  831         return tie() == other.
tie();
 
  838       typedef std::map<OpenMS::Size, std::set<PeptideProteinMatchInformation> > 
MapType;
 
  849         pep_to_prot(), filter_passed(0), filter_rejected(0), enzyme_(enzyme), xtandem_(xtandem)
 
  855         if (pep_to_prot.empty())
 
  861           for (FoundProteinFunctor::MapType::const_iterator it = other.
pep_to_prot.begin(); it != other.
pep_to_prot.end(); ++it)
 
  863             this->pep_to_prot[it->first].insert(other.
pep_to_prot[it->first].begin(), other.
pep_to_prot[it->first].end());
 
  888             (
position + len_pep >= seq_prot.size()) ?
 
  892           pep_to_prot[idx_pep].insert(match);
 
  910         const seqan::Peptide& tmp_pep = pep_DB[fuzzyAC.
getHitDBIndex()];
 
  915     void updateMembers_() 
override;
 
  918     bool prefix_{ 
false };
 
  923     bool write_protein_sequence_{ 
false };
 
  924     bool write_protein_description_{ 
false };
 
  925     bool keep_unreferenced_proteins_{ 
false };
 
  927     bool IL_equivalent_{ 
false };
 
  
MissingDecoy
Definition: PeptideIndexing.h:154
Base class for TOPP applications.
Definition: TOPPBase.h:144
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:77
void addHits_(AhoCorasickAmbiguous &fuzzyAC, const AhoCorasickAmbiguous::FuzzyACPattern &pattern, const AhoCorasickAmbiguous::PeptideDB &pep_DB, const String &prot, const String &full_prot, SignedSize idx_prot, Int offset, FoundProteinFunctor &func_threads) const
Definition: PeptideIndexing.h:905
Definition: PeptideIndexing.h:139
A convenience class to report either absolute or delta (between two timepoints) RAM usage.
Definition: SysInfo.h:83
void setDescription(const String &description)
sets the description of the protein
Definition: EnzymaticDigestion.h:71
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
void setSequence(const String &sequence)
sets the protein sequence
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
OpenMS::Size filter_passed
Definition: PeptideIndexing.h:840
bool findNext(const FuzzyACPattern &pattern)
Enumerate hits.
Definition: AhoCorasickAmbiguous.h:1037
void setProtein(const String &protein_sequence)
Reset to new protein sequence. All previous data is forgotten.
Definition: AhoCorasickAmbiguous.h:1024
ProteaseDigestion enzyme_
Definition: PeptideIndexing.h:844
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
bool has(const Key &key) const
Test whether the map contains the given key.
Definition: Map.h:108
String sequence
Definition: FASTAFile.h:80
A more convenient string class.
Definition: String.h:59
void addHit(const OpenMS::Size idx_pep, const OpenMS::Size idx_prot, const OpenMS::Size len_pep, const OpenMS::String &seq_prot, OpenMS::Int position)
Definition: PeptideIndexing.h:874
Unmatched
Action to take when peptide hits could not be matched.
Definition: PeptideIndexing.h:145
String description
Definition: FASTAFile.h:79
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Representation of a protein hit.
Definition: ProteinHit.h:58
Int getHitProteinPosition()
Offset into protein sequence where hit was found.
Definition: AhoCorasickAmbiguous.h:1057
String< AAcid, Alloc< void > > AAString
Definition: AhoCorasickAmbiguous.h:206
Extended Aho-Corasick algorithm capable of matching ambiguous amino acids in the pattern (i....
Definition: AhoCorasickAmbiguous.h:970
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
ExitCodes run(std::vector< FASTAFile::FASTAEntry > &proteins, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids)
forward for old interface and pyOpenMS; use run<T>() for more control
Definition: PeptideIndexing.h:171
String delta(const String &event="delta")
#define DEBUG_ONLY
Definition: AhoCorasickAmbiguous.h:46
static void initPattern(const PeptideDB &pep_db, const int aaa_max, const int mm_max, FuzzyACPattern &pattern)
Construct a trie from a set of peptide sequences (which are to be found in a protein).
Definition: AhoCorasickAmbiguous.h:991
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
static Specificity getSpecificityByName(const String &name)
OpenMS::Size filter_rejected
Definition: PeptideIndexing.h:841
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
Definition: PeptideIndexing.h:141
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
MapType pep_to_prot
Definition: PeptideIndexing.h:839
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:70
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
void setAccession(const String &accession)
sets the accession of the protein
static const char N_TERMINAL_AA
Definition: PeptideEvidence.h:60
ExitCodes run(FASTAContainer< T > &proteins, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids)
Re-index peptide identifications honoring enzyme cutting rules, ambiguous amino acids and target/deco...
Definition: PeptideIndexing.h:213
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
bool toBool() const
Conversion to bool.
Size getHitDBIndex()
Get index of hit into peptide database of the pattern.
Definition: AhoCorasickAmbiguous.h:1047
void merge(FoundProteinFunctor &other)
Definition: PeptideIndexing.h:853
FoundProteinFunctor(const ProteaseDigestion &enzyme, bool xtandem)
Definition: PeptideIndexing.h:848
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
FASTAContainer<TFI_Vector> simply takes an existing vector of FASTAEntries and provides the same inte...
Definition: FASTAContainer.h:243
double getClockTime() const
void reset()
Clear the stop watch but keep running.
bool has(Byte byte) const
true if String contains the byte, false otherwise
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
void start()
Start the stop watch.
::seqan::Pattern< PeptideDB, ::seqan::FuzzyAC > FuzzyACPattern
Definition: AhoCorasickAmbiguous.h:974
void stop()
Stop the stop watch (can be resumed later). If the stop watch was not running an exception is thrown.
void setParameters(const Param ¶m)
Sets the parameters.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
const Param & getParameters() const
Non-mutable access to the parameters.
Not implemented exception.
Definition: Exception.h:436
::seqan::StringSet<::seqan::AAString > PeptideDB
Definition: AhoCorasickAmbiguous.h:973
String toString() const
get a compact representation of the current time status.
ExitCodes
Exit codes.
Definition: PeptideIndexing.h:135
void after()
record data for the second timepoint
This class is used to determine the current process' CPU (user and/or kernel) and wall time.
Definition: StopWatch.h:65
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
std::map< OpenMS::Size, std::set< PeptideProteinMatchInformation > > MapType
Definition: PeptideIndexing.h:838
bool update(const Param &p_outdated, const bool add_unknown=false)
Rescue parameter values from p_outdated to current param.
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
String getEnzymeName() const
Returns the enzyme for the digestion.
static char const  *const AUTO_MODE
name of enzyme/specificity which signals that the enzyme/specificity should be taken from meta inform...
Definition: PeptideIndexing.h:132
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
Logger::LogStream OpenMS_Log_debug
Global static instance of a LogStream to capture messages classified as debug output....
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
static String findDatabase(const String &db_name)
Definition: PeptideIndexing.h:138
Refreshes the protein references for all peptide hits in a vector of PeptideIdentifications and adds ...
Definition: PeptideIndexing.h:126
static bool readable(const String &file)
Return true if the file exists and is readable.
static const char C_TERMINAL_AA
Definition: PeptideEvidence.h:61
bool xtandem_
Definition: PeptideIndexing.h:845
Management and storage of parameters / INI files.
Definition: Param.h:73
Definition: PeptideIndexing.h:835
String & remove(char what)
Remove all occurrences of the character what.
bool isAmbiguous(AAcid c)
Definition: AhoCorasickAmbiguous.h:578
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
static String & toUpper(String &this_s)
Definition: StringUtils.h:874
Definition: PeptideIndexing.h:137
Definition: PeptideIndexing.h:140
Size< TNeedle >::Type position(const PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:561
static Result findDecoyString(FASTAContainer< T > &proteins)
Heuristic to determine the decoy string given a set of protein names.
Definition: FASTAContainer.h:359
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
template parameter for vector-based FASTA access
Definition: FASTAContainer.h:82
Used to load and store idXML files.
Definition: IdXMLFile.h:63
FASTAContainer<TFI_File> will make FASTA entries available chunk-wise from start to end by loading it...
Definition: FASTAContainer.h:93