|
| static std::shared_ptr< arrow::Table > | exportProteinsToArrow (const std::vector< ProteinIdentification > &protein_identifications) |
| | Export protein hits to Apache Arrow Table.
|
| |
| static bool | exportProteinsToParquet (const std::vector< ProteinIdentification > &protein_identifications, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{}) |
| | Export protein hits to Parquet file.
|
| |
| static std::shared_ptr< arrow::Table > | exportProteinGroupsToArrow (const std::vector< ProteinIdentification > &protein_identifications) |
| | Export protein groups to Apache Arrow Table.
|
| |
| static bool | exportProteinGroupsToParquet (const std::vector< ProteinIdentification > &protein_identifications, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{}) |
| | Export protein groups to Parquet file.
|
| |
| static std::shared_ptr< arrow::Table > | exportSearchParamsToArrow (const std::vector< ProteinIdentification > &protein_identifications) |
| | Export search parameters to Apache Arrow Table.
|
| |
| static bool | exportSearchParamsToParquet (const std::vector< ProteinIdentification > &protein_identifications, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{}) |
| | Export search parameters to Parquet file.
|
| |
| static bool | importFromParquet (const std::string &proteins_filename, const std::string &protein_groups_filename, const std::string &search_params_filename, std::vector< ProteinIdentification > &protein_identifications) |
| | Import all three Parquet files and reconstruct ProteinIdentifications.
|
| |
| static bool | importSearchParamsFromArrow (const std::shared_ptr< arrow::Table > &table, std::vector< ProteinIdentification > &protein_identifications) |
| | Import search parameters from Arrow Table.
|
| |
| static bool | importProteinsFromArrow (const std::shared_ptr< arrow::Table > &table, std::vector< ProteinIdentification > &protein_identifications) |
| | Import protein hits from Arrow Table.
|
| |
| static bool | importProteinGroupsFromArrow (const std::shared_ptr< arrow::Table > &table, std::vector< ProteinIdentification > &protein_identifications) |
| | Import protein groups from Arrow Table.
|
| |
| static bool | importSearchParamsFromParquet (const std::string &filename, std::vector< ProteinIdentification > &protein_identifications) |
| | Import search parameters from Parquet file.
|
| |
| static bool | importProteinsFromParquet (const std::string &filename, std::vector< ProteinIdentification > &protein_identifications) |
| | Import protein hits from Parquet file.
|
| |
| static bool | importProteinGroupsFromParquet (const std::string &filename, std::vector< ProteinIdentification > &protein_identifications) |
| | Import protein groups from Parquet file.
|
| |
| static std::map< std::string, std::string > | synthesizeRunIdentifiers (std::vector< ProteinIdentification > &protein_identifications) |
| | Synthesize fresh run identifiers per ProteinIdentification, mirroring IdXMLFile.
|
| |
| static void | applyRunIdentifierRename (const std::map< std::string, std::string > &rename, PeptideIdentificationList &pep_ids) |
| | Apply a stored->synthesized identifier rename to a PeptideIdentification collection.
|
| |
| static void | checkUniqueIdentifiers (const std::vector< ProteinIdentification > &protein_identifications) |
| | Reject a ProteinIdentification vector with duplicate identifiers (store-side check).
|
| |
Import and export ProteinIdentification data to/from Apache Arrow format.
This class provides static methods to export and import ProteinIdentification data to/from Apache Arrow Tables and Parquet files. Separate tables are provided for protein hits, protein groups, and search parameters.
- Experimental classes:
- This API is experimental and may change in future versions.
| static std::map< std::string, std::string > synthesizeRunIdentifiers |
( |
std::vector< ProteinIdentification > & |
protein_identifications | ) |
|
|
static |
Synthesize fresh run identifiers per ProteinIdentification, mirroring IdXMLFile.
Mirrors IdXMLFile.cpp:530: every load assigns each ProteinIdentification a fresh identifier <search_engine>_<date>_<UniqueIdGenerator>. The stored identifier on disk is informational; the in-memory identifier is regenerated. This is the same defense FeatureXMLHandler / ConsensusXMLHandler / IdXMLFile apply on load against downstream-collision-after-rip-and-merge scenarios.
The function mutates protein_identifications in place and returns the map { stored_id -> synthesized_id } so the caller can apply the same rename to each PeptideIdentification collection it owns (FeatureMap has 2: per-feature and unassigned; ConsensusMap has 2; PSMArrowIO has 1).
Edge cases:
- empty getSearchEngine() falls back to literal "unknown"
- invalid getDateTime() uses placeholder "1900-01-01T00:00:00"
- multiple ProtIDs sharing one stored identifier each receive their own distinct synthesized identifier; the returned map collapses to the last-seen entry. An OPENMS_LOG_WARN is emitted once per such collision.
- Parameters
-
| [in,out] | protein_identifications | ProtID vector whose identifiers will be replaced |
- Returns
- Map from each stored identifier to its synthesized replacement.