OpenMS
Loading...
Searching...
No Matches
ProteinIdentificationArrowIO.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
12
17
18#include <map>
19#include <memory>
20#include <vector>
21
22// Forward declarations
23namespace arrow
24{
25 class Table;
26}
27
28namespace OpenMS
29{
30
43{
44public:
45 // ==================== Export methods ====================
46
55 static std::shared_ptr<arrow::Table> exportProteinsToArrow(
56 const std::vector<ProteinIdentification>& protein_identifications);
57
67 const std::vector<ProteinIdentification>& protein_identifications,
68 const std::string& filename,
69 const ParquetWriteConfig& config = ParquetWriteConfig{});
70
79 static std::shared_ptr<arrow::Table> exportProteinGroupsToArrow(
80 const std::vector<ProteinIdentification>& protein_identifications);
81
91 const std::vector<ProteinIdentification>& protein_identifications,
92 const std::string& filename,
93 const ParquetWriteConfig& config = ParquetWriteConfig{});
94
103 static std::shared_ptr<arrow::Table> exportSearchParamsToArrow(
104 const std::vector<ProteinIdentification>& protein_identifications);
105
115 const std::vector<ProteinIdentification>& protein_identifications,
116 const std::string& filename,
117 const ParquetWriteConfig& config = ParquetWriteConfig{});
118
119 // ==================== Import methods ====================
120
133 static bool importFromParquet(
134 const std::string& proteins_filename,
135 const std::string& protein_groups_filename,
136 const std::string& search_params_filename,
137 std::vector<ProteinIdentification>& protein_identifications);
138
150 const std::shared_ptr<arrow::Table>& table,
151 std::vector<ProteinIdentification>& protein_identifications);
152
164 const std::shared_ptr<arrow::Table>& table,
165 std::vector<ProteinIdentification>& protein_identifications);
166
178 const std::shared_ptr<arrow::Table>& table,
179 std::vector<ProteinIdentification>& protein_identifications);
180
189 const std::string& filename,
190 std::vector<ProteinIdentification>& protein_identifications);
191
200 const std::string& filename,
201 std::vector<ProteinIdentification>& protein_identifications);
202
211 const std::string& filename,
212 std::vector<ProteinIdentification>& protein_identifications);
213
214 // ==================== Identifier handling parity with XML lane ====================
215
240 static std::map<std::string, std::string> synthesizeRunIdentifiers(
241 std::vector<ProteinIdentification>& protein_identifications);
242
254 const std::map<std::string, std::string>& rename,
256
269 const std::vector<ProteinIdentification>& protein_identifications);
270};
271
272} // namespace OpenMS
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Import and export ProteinIdentification data to/from Apache Arrow format.
Definition ProteinIdentificationArrowIO.h:43
static void checkUniqueIdentifiers(const std::vector< ProteinIdentification > &protein_identifications)
Reject a ProteinIdentification vector with duplicate identifiers (store-side check).
static bool exportProteinGroupsToParquet(const std::vector< ProteinIdentification > &protein_identifications, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{})
Export protein groups to Parquet file.
static bool importProteinGroupsFromArrow(const std::shared_ptr< arrow::Table > &table, std::vector< ProteinIdentification > &protein_identifications)
Import protein groups from Arrow Table.
static bool exportSearchParamsToParquet(const std::vector< ProteinIdentification > &protein_identifications, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{})
Export search parameters to Parquet file.
static bool exportProteinsToParquet(const std::vector< ProteinIdentification > &protein_identifications, const std::string &filename, const ParquetWriteConfig &config=ParquetWriteConfig{})
Export protein hits to Parquet file.
static bool importFromParquet(const std::string &proteins_filename, const std::string &protein_groups_filename, const std::string &search_params_filename, std::vector< ProteinIdentification > &protein_identifications)
Import all three Parquet files and reconstruct ProteinIdentifications.
static std::shared_ptr< arrow::Table > exportProteinGroupsToArrow(const std::vector< ProteinIdentification > &protein_identifications)
Export protein groups to Apache Arrow Table.
static bool importProteinsFromParquet(const std::string &filename, std::vector< ProteinIdentification > &protein_identifications)
Import protein hits from Parquet file.
static bool importProteinGroupsFromParquet(const std::string &filename, std::vector< ProteinIdentification > &protein_identifications)
Import protein groups from Parquet file.
static bool importProteinsFromArrow(const std::shared_ptr< arrow::Table > &table, std::vector< ProteinIdentification > &protein_identifications)
Import protein hits from Arrow Table.
static std::shared_ptr< arrow::Table > exportSearchParamsToArrow(const std::vector< ProteinIdentification > &protein_identifications)
Export search parameters to Apache Arrow Table.
static bool importSearchParamsFromArrow(const std::shared_ptr< arrow::Table > &table, std::vector< ProteinIdentification > &protein_identifications)
Import search parameters from Arrow Table.
static std::shared_ptr< arrow::Table > exportProteinsToArrow(const std::vector< ProteinIdentification > &protein_identifications)
Export protein hits to Apache Arrow Table.
static void applyRunIdentifierRename(const std::map< std::string, std::string > &rename, PeptideIdentificationList &pep_ids)
Apply a stored->synthesized identifier rename to a PeptideIdentification collection.
static std::map< std::string, std::string > synthesizeRunIdentifiers(std::vector< ProteinIdentification > &protein_identifications)
Synthesize fresh run identifiers per ProteinIdentification, mirroring IdXMLFile.
static bool importSearchParamsFromParquet(const std::string &filename, std::vector< ProteinIdentification > &protein_identifications)
Import search parameters from Parquet file.
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Definition ArrowIOHelpers.h:22
Configuration for Parquet file writing.
Definition MSExperimentArrowExport.h:136