OpenMS
Loading...
Searching...
No Matches
MSExperimentArrowExport.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
12
15
16#include <cstdint>
17#include <vector>
18#include <string>
19
20// Forward declarations for Arrow C Data Interface structs (opaque pointers only)
21// Full definitions are in <arrow/c/abi.h>, included only in MSExperimentArrowExport.cpp
22struct ArrowSchema;
23struct ArrowArray;
24
25namespace OpenMS
26{
27
44{
45 Long,
47};
48
60struct OPENMS_DLLAPI ArrowSpectraExportConfig
61{
63 ArrowExportFormat format = ArrowExportFormat::Long;
64
66 std::vector<UInt> ms_levels;
67
69 double min_rt = 0;
70
72 double max_rt = 0;
73
75 double min_mz = 0;
76
78 double max_mz = 0;
79
86 std::vector<std::string> columns;
87
89 bool include_precursor_info = true;
90
92 bool include_ion_mobility = true;
93};
94
95
105{
107 ArrowExportFormat format = ArrowExportFormat::Long;
108
110 double min_rt = 0;
111
113 double max_rt = 0;
114
116 std::vector<std::string> columns;
117};
118
119
135struct OPENMS_DLLAPI ParquetWriteConfig
136{
138 enum class Compression
139 {
140 NONE,
141 SNAPPY,
142 GZIP,
143 LZ4,
144 ZSTD
145 };
146
148 Compression compression = Compression::ZSTD;
149
154 int compression_level = 3;
155
159 int64_t row_group_size = 128 * 1024 * 1024;
160
164 bool write_statistics = true;
165
168 int64_t data_page_size = 1024 * 1024;
169};
170
171
184class OPENMS_DLLAPI MSExperimentArrowExport
185{
186public:
197 static std::vector<std::string> getSpectraArrowColumnNames(
198 const MSExperiment& exp,
200
201
209 static std::vector<std::string> getChromatogramArrowColumnNames(
210 const MSExperiment& exp,
212
213
231 const MSExperiment& exp,
232 const ArrowSpectraExportConfig& config,
233 ::ArrowSchema* out_schema,
234 ::ArrowArray* out_array);
235
236
247 const MSExperiment& exp,
248 const ArrowChromatogramExportConfig& config,
249 ::ArrowSchema* out_schema,
250 ::ArrowArray* out_array);
251
252
325 const MSExperiment& exp,
326 const String& filename,
328 const ParquetWriteConfig& parquet_config = ParquetWriteConfig{});
329
330
344 const MSExperiment& exp,
345 const String& filename,
347 const ParquetWriteConfig& parquet_config = ParquetWriteConfig{});
348}; // class MSExperimentArrowExport
349
350} // namespace OpenMS
Export MSExperiment data to Apache Arrow format.
Definition MSExperimentArrowExport.h:185
static bool exportChromatogramsToArrowCDataInterface(const MSExperiment &exp, const ArrowChromatogramExportConfig &config, ::ArrowSchema *out_schema, ::ArrowArray *out_array)
Export chromatograms to Arrow via C Data Interface (zero-copy to Python)
static bool exportSpectraToParquet(const MSExperiment &exp, const String &filename, const ArrowSpectraExportConfig &config=ArrowSpectraExportConfig{}, const ParquetWriteConfig &parquet_config=ParquetWriteConfig{})
Export MSExperiment spectra to Parquet file.
static std::vector< std::string > getChromatogramArrowColumnNames(const MSExperiment &exp, const ArrowChromatogramExportConfig &config=ArrowChromatogramExportConfig{})
Get available column names for chromatogram Arrow export.
static std::vector< std::string > getSpectraArrowColumnNames(const MSExperiment &exp, const ArrowSpectraExportConfig &config=ArrowSpectraExportConfig{})
Get available column names for spectra Arrow export.
static bool exportChromatogramsToParquet(const MSExperiment &exp, const String &filename, const ArrowChromatogramExportConfig &config=ArrowChromatogramExportConfig{}, const ParquetWriteConfig &parquet_config=ParquetWriteConfig{})
Export MSExperiment chromatograms to Parquet file.
static bool exportSpectraToArrowCDataInterface(const MSExperiment &exp, const ArrowSpectraExportConfig &config, ::ArrowSchema *out_schema, ::ArrowArray *out_array)
Export spectra to Arrow via C Data Interface (zero-copy to Python)
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
A more convenient string class.
Definition String.h:32
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::vector< std::string > columns
Definition MSExperimentArrowExport.h:86
ArrowExportFormat
Format for Arrow export.
Definition MSExperimentArrowExport.h:44
@ Long
One row per peak (default)
@ SemiWide
One row per spectrum with list arrays for mz/intensity.
std::vector< UInt > ms_levels
MS levels to include (empty = all levels)
Definition MSExperimentArrowExport.h:66
Configuration for Arrow export of chromatogram data.
Definition MSExperimentArrowExport.h:105
Configuration for Arrow export of spectra data.
Definition MSExperimentArrowExport.h:61
Configuration for Parquet file writing.
Definition MSExperimentArrowExport.h:136
Compression
Compression algorithm.
Definition MSExperimentArrowExport.h:139