OpenMS
Loading...
Searching...
No Matches
ArrowSchemaRegistry.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/OpenMSConfig.h>
12#include <memory>
13#include <string>
14#include <vector>
15
16// Forward declarations
17namespace arrow
18{
19 class Schema;
20 class DataType;
21 class Table;
22}
23
24namespace OpenMS
25{
26
28 namespace ArrowSchemaValidation
29 {
31 enum class Mode
32 {
33 Strict,
34 Subset
35 };
36
38 struct OPENMS_DLLAPI ValidationResult
39 {
40 bool valid = true;
41 std::vector<std::string> errors;
42 std::string toString() const;
43 };
44
47 const std::shared_ptr<arrow::Table>& table,
48 const std::shared_ptr<arrow::Schema>& expected_schema,
49 Mode mode = Mode::Strict);
50 }
51
53 struct OPENMS_DLLAPI ProteinSchema
54 {
55 static constexpr const char* ACCESSION = "accession";
56 static constexpr const char* SCORE = "score";
57 static constexpr const char* RANK = "rank";
58 static constexpr const char* COVERAGE = "coverage";
59 static constexpr const char* SEQUENCE = "sequence";
60 static constexpr const char* DESCRIPTION = "description";
61 static constexpr const char* IS_DECOY = "is_decoy";
62 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
63 static constexpr const char* MODIFICATIONS = "modifications";
64 static constexpr const char* METAVALUES = "metavalues";
65
66 static std::shared_ptr<arrow::DataType> modificationsType();
67 static std::shared_ptr<arrow::DataType> metavaluesType();
68 static std::shared_ptr<arrow::Schema> schema();
69 };
70
72 struct OPENMS_DLLAPI ProteinGroupSchema
73 {
74 static constexpr const char* GROUP_TYPE = "group_type";
75 static constexpr const char* PROBABILITY = "probability";
76 static constexpr const char* ACCESSIONS = "accessions";
77 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
78 static constexpr const char* GROUP_INDEX = "group_index";
79 static constexpr const char* FLOAT_DATA = "float_data";
80 static constexpr const char* STRING_DATA = "string_data";
81 static constexpr const char* INTEGER_DATA = "integer_data";
82
83 static std::shared_ptr<arrow::DataType> floatDataType();
84 static std::shared_ptr<arrow::DataType> stringDataType();
85 static std::shared_ptr<arrow::DataType> integerDataType();
86 static std::shared_ptr<arrow::Schema> schema();
87 };
88
90 struct OPENMS_DLLAPI SearchParamsSchema
91 {
92 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
93 static constexpr const char* SEARCH_ENGINE = "search_engine";
94 static constexpr const char* SEARCH_ENGINE_VERSION = "search_engine_version";
95 static constexpr const char* INFERENCE_ENGINE = "inference_engine";
96 static constexpr const char* INFERENCE_ENGINE_VERSION = "inference_engine_version";
97 static constexpr const char* DATE = "date";
98 static constexpr const char* SCORE_TYPE = "score_type";
99 static constexpr const char* HIGHER_SCORE_BETTER = "higher_score_better";
100 static constexpr const char* SIGNIFICANCE_THRESHOLD = "significance_threshold";
101 static constexpr const char* DB = "db";
102 static constexpr const char* DB_VERSION = "db_version";
103 static constexpr const char* TAXONOMY = "taxonomy";
104 static constexpr const char* CHARGES = "charges";
105 static constexpr const char* MASS_TYPE = "mass_type";
106 static constexpr const char* PRECURSOR_MASS_TOLERANCE = "precursor_mass_tolerance";
107 static constexpr const char* PRECURSOR_MASS_TOLERANCE_PPM = "precursor_mass_tolerance_ppm";
108 static constexpr const char* FRAGMENT_MASS_TOLERANCE = "fragment_mass_tolerance";
109 static constexpr const char* FRAGMENT_MASS_TOLERANCE_PPM = "fragment_mass_tolerance_ppm";
110 static constexpr const char* DIGESTION_ENZYME = "digestion_enzyme";
111 static constexpr const char* ENZYME_TERM_SPECIFICITY = "enzyme_term_specificity";
112 static constexpr const char* MISSED_CLEAVAGES = "missed_cleavages";
113 static constexpr const char* FIXED_MODIFICATIONS = "fixed_modifications";
114 static constexpr const char* VARIABLE_MODIFICATIONS = "variable_modifications";
115 static constexpr const char* PRIMARY_MS_RUN_PATHS = "primary_ms_run_paths";
116 static constexpr const char* METAVALUES = "metavalues";
117 static constexpr const char* SP_METAVALUES = "sp_metavalues";
118
119 static std::shared_ptr<arrow::DataType> metavaluesType();
120 static std::shared_ptr<arrow::Schema> schema();
121 };
122
124 struct OPENMS_DLLAPI FeatureSchema
125 {
126 static constexpr const char* UNIQUE_ID = "unique_id";
127 static constexpr const char* PARENT_FEATURE_ID = "parent_feature_id";
128 static constexpr const char* DEPTH = "depth";
129 static constexpr const char* RT = "rt";
130 static constexpr const char* MZ = "mz";
131 static constexpr const char* INTENSITY = "intensity";
132 static constexpr const char* CHARGE = "charge";
133 static constexpr const char* QUALITY = "quality";
134 static constexpr const char* QUALITY_RT = "quality_rt";
135 static constexpr const char* QUALITY_MZ = "quality_mz";
136 static constexpr const char* WIDTH = "width";
137 static constexpr const char* RT_BB_MIN = "rt_bb_min";
138 static constexpr const char* RT_BB_MAX = "rt_bb_max";
139 static constexpr const char* MZ_BB_MIN = "mz_bb_min";
140 static constexpr const char* MZ_BB_MAX = "mz_bb_max";
141 static constexpr const char* CONVEX_HULLS = "convex_hulls";
142 static constexpr const char* METAVALUES = "metavalues";
143
144 static std::shared_ptr<arrow::DataType> convexHullType();
145 static std::shared_ptr<arrow::DataType> metavaluesType();
146 static std::shared_ptr<arrow::Schema> schema();
147 };
148
150 struct OPENMS_DLLAPI ConsensusFeatureSchema
151 {
152 static constexpr const char* UNIQUE_ID = "unique_id";
153 static constexpr const char* RT = "rt";
154 static constexpr const char* MZ = "mz";
155 static constexpr const char* INTENSITY = "intensity";
156 static constexpr const char* CHARGE = "charge";
157 static constexpr const char* QUALITY = "quality";
158 static constexpr const char* WIDTH = "width";
159 static constexpr const char* HANDLES = "handles";
160 static constexpr const char* METAVALUES = "metavalues";
161
162 static std::shared_ptr<arrow::DataType> handlesType();
163 static std::shared_ptr<arrow::DataType> metavaluesType();
164 static std::shared_ptr<arrow::Schema> schema();
165 };
166
168 struct OPENMS_DLLAPI PSMSchema
169 {
170 static constexpr const char* SEQUENCE = "sequence";
171 static constexpr const char* PEPTIDOFORM = "peptidoform";
172 static constexpr const char* MODIFICATIONS = "modifications";
173 static constexpr const char* PRECURSOR_CHARGE = "precursor_charge";
174 static constexpr const char* POSTERIOR_ERROR_PROBABILITY = "posterior_error_probability";
175 static constexpr const char* IS_DECOY = "is_decoy";
176 static constexpr const char* CALCULATED_MZ = "calculated_mz";
177 static constexpr const char* OBSERVED_MZ = "observed_mz";
178 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
179 static constexpr const char* PROTEIN_ACCESSIONS = "protein_accessions";
180 static constexpr const char* PREDICTED_RT = "predicted_rt";
181 static constexpr const char* REFERENCE_FILE_NAME = "reference_file_name";
182 static constexpr const char* CV_PARAMS = "cv_params";
183 static constexpr const char* SCAN = "scan";
184 static constexpr const char* RT = "rt";
185 static constexpr const char* ION_MOBILITY = "ion_mobility";
186 static constexpr const char* SPECTRUM_REFERENCE = "spectrum_reference";
187 static constexpr const char* SCORE = "score";
188 static constexpr const char* SCORE_TYPE = "score_type";
189 static constexpr const char* HIGHER_SCORE_BETTER = "higher_score_better";
193 static constexpr const char* HIT_INDEX = "hit_index";
194 static constexpr const char* PEPTIDE_IDENTIFICATION_INDEX = "peptide_identification_index";
195 static constexpr const char* PSM_METAVALUES = "psm_metavalues";
196 static constexpr const char* SPECTRUM_METAVALUES = "spectrum_metavalues";
197 static constexpr const char* RUN_IDENTIFIER = "run_identifier";
198 static constexpr const char* MZ_ARRAY = "mz_array";
199 static constexpr const char* INTENSITY_ARRAY = "intensity_array";
200 static constexpr const char* CHARGE_ARRAY = "charge_array";
201 static constexpr const char* ION_TYPE_ARRAY = "ion_type_array";
202
203 static std::shared_ptr<arrow::DataType> modificationsType();
204 static std::shared_ptr<arrow::DataType> additionalScoresType();
205 static std::shared_ptr<arrow::DataType> metavaluesType();
209 static std::shared_ptr<arrow::DataType> proteinAccessionsType();
210 static std::shared_ptr<arrow::Schema> schema();
211 };
212
217 struct OPENMS_DLLAPI QPXPSMSchema
218 {
219 static constexpr const char* SEQUENCE = "sequence";
220 static constexpr const char* PEPTIDOFORM = "peptidoform";
221 static constexpr const char* MODIFICATIONS = "modifications";
222 static constexpr const char* CHARGE = "charge";
223 static constexpr const char* POSTERIOR_ERROR_PROBABILITY = "posterior_error_probability";
224 static constexpr const char* IS_DECOY = "is_decoy";
225 static constexpr const char* CALCULATED_MZ = "calculated_mz";
226 static constexpr const char* OBSERVED_MZ = "observed_mz";
227 static constexpr const char* MASS_ERROR_PPM = "mass_error_ppm";
228 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
229 static constexpr const char* PREDICTED_RT = "predicted_rt";
230 static constexpr const char* RUN_FILE_NAME = "run_file_name";
231 static constexpr const char* CV_PARAMS = "cv_params";
232 static constexpr const char* SCAN = "scan";
233 static constexpr const char* RT = "rt";
234 static constexpr const char* ION_MOBILITY = "ion_mobility";
235 static constexpr const char* MISSED_CLEAVAGES = "missed_cleavages";
236 static constexpr const char* PROTEIN_ACCESSIONS = "protein_accessions";
237 static constexpr const char* CROSS_LINKS = "cross_links";
238 static constexpr const char* MZ_ARRAY = "mz_array";
239 static constexpr const char* INTENSITY_ARRAY = "intensity_array";
240 static constexpr const char* CHARGE_ARRAY = "charge_array";
241 static constexpr const char* ION_TYPE_ARRAY = "ion_type_array";
242 static constexpr const char* ION_MOBILITY_ARRAY = "ion_mobility_array";
243
245 static std::shared_ptr<arrow::DataType> modificationsType();
247 static std::shared_ptr<arrow::DataType> additionalScoresType();
249 static std::shared_ptr<arrow::DataType> cvParamsType();
251 static std::shared_ptr<arrow::DataType> crossLinksType();
253 static std::shared_ptr<arrow::Schema> schema();
254 };
255
260 struct OPENMS_DLLAPI QPXFeatureSchema
261 {
262 static constexpr const char* SEQUENCE = "sequence";
263 static constexpr const char* PEPTIDOFORM = "peptidoform";
264 static constexpr const char* MODIFICATIONS = "modifications";
265 static constexpr const char* CHARGE = "charge";
266 static constexpr const char* POSTERIOR_ERROR_PROBABILITY = "posterior_error_probability";
267 static constexpr const char* IS_DECOY = "is_decoy";
268 static constexpr const char* CALCULATED_MZ = "calculated_mz";
269 static constexpr const char* OBSERVED_MZ = "observed_mz";
270 static constexpr const char* MASS_ERROR_PPM = "mass_error_ppm";
271 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
272 static constexpr const char* PREDICTED_RT = "predicted_rt";
273 static constexpr const char* RUN_FILE_NAME = "run_file_name";
274 static constexpr const char* CV_PARAMS = "cv_params";
275 static constexpr const char* SCAN = "scan";
276 static constexpr const char* RT = "rt";
277 static constexpr const char* ION_MOBILITY = "ion_mobility";
278 static constexpr const char* MISSED_CLEAVAGES = "missed_cleavages";
279 static constexpr const char* INTENSITIES = "intensities";
280 static constexpr const char* ADDITIONAL_INTENSITIES = "additional_intensities";
281 static constexpr const char* PG_ACCESSIONS = "pg_accessions";
282 static constexpr const char* ANCHOR_PROTEIN = "anchor_protein";
283 static constexpr const char* UNIQUE = "unique";
284 static constexpr const char* PG_GLOBAL_QVALUE = "pg_global_qvalue";
285 static constexpr const char* PG_POSITIONS = "pg_positions";
286 static constexpr const char* ION_MOBILITY_START = "ion_mobility_start";
287 static constexpr const char* ION_MOBILITY_STOP = "ion_mobility_stop";
288 static constexpr const char* GG_ACCESSIONS = "gg_accessions";
289 static constexpr const char* GG_NAMES = "gg_names";
290 static constexpr const char* ID_RUN_FILE_NAME = "id_run_file_name";
291 static constexpr const char* RT_START = "rt_start";
292 static constexpr const char* RT_STOP = "rt_stop";
293
295 static std::shared_ptr<arrow::DataType> modificationsType();
297 static std::shared_ptr<arrow::DataType> additionalScoresType();
299 static std::shared_ptr<arrow::DataType> cvParamsType();
301 static std::shared_ptr<arrow::DataType> intensitiesType();
303 static std::shared_ptr<arrow::DataType> additionalIntensitiesType();
305 static std::shared_ptr<arrow::DataType> pgAccessionsType();
307 static std::shared_ptr<arrow::DataType> pgPositionsType();
309 static std::shared_ptr<arrow::Schema> schema();
310 };
311
317 struct OPENMS_DLLAPI QPXPgSchema
318 {
319 static constexpr const char* PG_ACCESSIONS = "pg_accessions";
320 static constexpr const char* PG_NAMES = "pg_names";
321 static constexpr const char* GG_ACCESSIONS = "gg_accessions";
322 static constexpr const char* GG_NAMES = "gg_names";
323 static constexpr const char* GG_QVALUE = "gg_qvalue";
324 static constexpr const char* ANCHOR_PROTEIN = "anchor_protein";
325 static constexpr const char* RUN_FILE_NAME = "run_file_name";
326 static constexpr const char* GLOBAL_QVALUE = "global_qvalue";
327 static constexpr const char* PG_QVALUE = "pg_qvalue";
328 static constexpr const char* INTENSITIES = "intensities";
329 static constexpr const char* ADDITIONAL_INTENSITIES = "additional_intensities";
330 static constexpr const char* IS_DECOY = "is_decoy";
331 static constexpr const char* CONTAMINANT = "contaminant";
332 static constexpr const char* PEPTIDES = "peptides";
333 static constexpr const char* PEPTIDE_COUNTS = "peptide_counts";
334 static constexpr const char* FEATURE_COUNTS = "feature_counts";
335 static constexpr const char* SEQUENCE_COVERAGE = "sequence_coverage";
336 static constexpr const char* MOLECULAR_WEIGHT = "molecular_weight";
337 static constexpr const char* ADDITIONAL_SCORES = "additional_scores";
338 static constexpr const char* CV_PARAMS = "cv_params";
339
341 static std::shared_ptr<arrow::DataType> intensitiesType();
343 static std::shared_ptr<arrow::DataType> additionalIntensitiesType();
345 static std::shared_ptr<arrow::DataType> peptidesType();
347 static std::shared_ptr<arrow::DataType> peptideCountsType();
349 static std::shared_ptr<arrow::DataType> featureCountsType();
351 static std::shared_ptr<arrow::DataType> additionalScoresType();
353 static std::shared_ptr<arrow::DataType> cvParamsType();
355 static std::shared_ptr<arrow::Schema> schema();
356 };
357
359 struct OPENMS_DLLAPI SpectraLongSchema
360 {
361 static constexpr const char* MZ = "mz";
362 static constexpr const char* INTENSITY = "intensity";
363 static constexpr const char* RT = "rt";
364 static constexpr const char* ION_MOBILITY = "ion_mobility";
365 static constexpr const char* SPECTRUM_INDEX = "spectrum_index";
366 static constexpr const char* MS_LEVEL = "ms_level";
367 static constexpr const char* NATIVE_ID = "native_id";
368 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
369 static constexpr const char* PRECURSOR_CHARGE = "precursor_charge";
370 static constexpr const char* PRECURSOR_INTENSITY = "precursor_intensity";
371 static constexpr const char* ISOLATION_LOWER = "isolation_lower";
372 static constexpr const char* ISOLATION_UPPER = "isolation_upper";
373
374 static std::shared_ptr<arrow::Schema> schema();
375 };
376
378 struct OPENMS_DLLAPI SpectraSemiWideSchema
379 {
380 static constexpr const char* SPECTRUM_INDEX = "spectrum_index";
381 static constexpr const char* RT = "rt";
382 static constexpr const char* MS_LEVEL = "ms_level";
383 static constexpr const char* NATIVE_ID = "native_id";
384 static constexpr const char* MZ = "mz";
385 static constexpr const char* INTENSITY = "intensity";
386 static constexpr const char* ION_MOBILITY = "ion_mobility";
387 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
388 static constexpr const char* PRECURSOR_CHARGE = "precursor_charge";
389 static constexpr const char* PRECURSOR_INTENSITY = "precursor_intensity";
390 static constexpr const char* ISOLATION_LOWER = "isolation_lower";
391 static constexpr const char* ISOLATION_UPPER = "isolation_upper";
392
393 static std::shared_ptr<arrow::Schema> schema();
394 };
395
397 struct OPENMS_DLLAPI ChromatogramSchema
398 {
399 static constexpr const char* RT = "rt";
400 static constexpr const char* INTENSITY = "intensity";
401 static constexpr const char* CHROMATOGRAM_INDEX = "chromatogram_index";
402 static constexpr const char* NATIVE_ID = "native_id";
403 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
404 static constexpr const char* PRODUCT_MZ = "product_mz";
405
406 static std::shared_ptr<arrow::Schema> schema();
407 };
408
410 struct OPENMS_DLLAPI ChromatogramSemiWideSchema
411 {
412 static constexpr const char* CHROMATOGRAM_INDEX = "chromatogram_index";
413 static constexpr const char* NATIVE_ID = "native_id";
414 static constexpr const char* RT = "rt";
415 static constexpr const char* INTENSITY = "intensity";
416 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
417 static constexpr const char* PRODUCT_MZ = "product_mz";
418
419 static std::shared_ptr<arrow::Schema> schema();
420 };
421
423 struct OPENMS_DLLAPI OSWPrecursorSchema
424 {
425 static constexpr const char* PRECURSOR_ID = "precursor_id";
426 static constexpr const char* PRECURSOR_MZ = "precursor_mz";
427 static constexpr const char* CHARGE = "charge";
428 static constexpr const char* LIBRARY_RT = "library_rt";
429 static constexpr const char* LIBRARY_DRIFT_TIME = "library_drift_time";
430 static constexpr const char* DECOY = "decoy";
431 static constexpr const char* TRAML_ID = "traml_id";
432 static constexpr const char* MODIFIED_SEQUENCE = "modified_sequence";
433 static constexpr const char* UNMODIFIED_SEQUENCE = "unmodified_sequence";
434 static constexpr const char* PROTEIN_ACCESSIONS = "protein_accessions";
435
436 static std::shared_ptr<arrow::Schema> schema();
437 };
438
440 struct OPENMS_DLLAPI OSWTransitionSchema
441 {
442 static constexpr const char* TRANSITION_ID = "transition_id";
443 static constexpr const char* PRECURSOR_ID = "precursor_id";
444 static constexpr const char* TRAML_ID = "traml_id";
445 static constexpr const char* PRODUCT_MZ = "product_mz";
446 static constexpr const char* CHARGE = "charge";
447 static constexpr const char* TYPE = "type";
448 static constexpr const char* ANNOTATION = "annotation";
449 static constexpr const char* ORDINAL = "ordinal";
450 static constexpr const char* DETECTING = "detecting";
451 static constexpr const char* IDENTIFYING = "identifying";
452 static constexpr const char* QUANTIFYING = "quantifying";
453 static constexpr const char* LIBRARY_INTENSITY = "library_intensity";
454 static constexpr const char* DECOY = "decoy";
455
456 static std::shared_ptr<arrow::Schema> schema();
457 };
458
460 struct OPENMS_DLLAPI OSWFeaturePrecursorSchema
461 {
462 static constexpr const char* FEATURE_ID = "feature_id";
463 static constexpr const char* RUN_ID = "run_id";
464 static constexpr const char* PRECURSOR_ISOTOPE = "precursor_isotope";
465 static constexpr const char* PRECURSOR_AREA_INTENSITY = "precursor_area_intensity";
466 static constexpr const char* PRECURSOR_APEX_INTENSITY = "precursor_apex_intensity";
467
468 static std::shared_ptr<arrow::Schema> schema();
469 };
470
472 struct OPENMS_DLLAPI OSWRunSchema
473 {
474 static constexpr const char* RUN_ID = "run_id";
475 static constexpr const char* FILENAME = "filename";
476
477 static std::shared_ptr<arrow::Schema> schema();
478 };
479
481 struct OPENMS_DLLAPI OSWFeatureSchema
482 {
483 static constexpr const char* FEATURE_ID = "feature_id";
484 static constexpr const char* RUN_ID = "run_id";
485 static constexpr const char* PRECURSOR_ID = "precursor_id";
486 static constexpr const char* EXP_RT = "exp_rt";
487 static constexpr const char* EXP_IM = "exp_im";
488 static constexpr const char* NORM_RT = "norm_rt";
489 static constexpr const char* DELTA_RT = "delta_rt";
490 static constexpr const char* LEFT_WIDTH = "left_width";
491 static constexpr const char* RIGHT_WIDTH = "right_width";
492 static constexpr const char* EXP_IM_LEFTWIDTH = "exp_im_leftwidth";
493 static constexpr const char* EXP_IM_RIGHTWIDTH = "exp_im_rightwidth";
494 static constexpr const char* MS1_AREA_INTENSITY = "ms1_area_intensity";
495 static constexpr const char* MS1_APEX_INTENSITY = "ms1_apex_intensity";
496 static constexpr const char* MS1_EXP_IM = "ms1_exp_im";
497 static constexpr const char* MS1_DELTA_IM = "ms1_delta_im";
498 static constexpr const char* VAR_MS1_MASSDEV_SCORE = "var_ms1_massdev_score";
499 static constexpr const char* VAR_MS1_IM_MS1_DELTA_SCORE = "var_ms1_im_ms1_delta_score";
500 static constexpr const char* VAR_MS1_MI_SCORE = "var_ms1_mi_score";
501 static constexpr const char* VAR_MS1_MI_CONTRAST_SCORE = "var_ms1_mi_contrast_score";
502 static constexpr const char* VAR_MS1_MI_COMBINED_SCORE = "var_ms1_mi_combined_score";
503 static constexpr const char* VAR_MS1_ISOTOPE_CORRELATION_SCORE = "var_ms1_isotope_correlation_score";
504 static constexpr const char* VAR_MS1_ISOTOPE_OVERLAP_SCORE = "var_ms1_isotope_overlap_score";
505 static constexpr const char* VAR_MS1_XCORR_COELUTION = "var_ms1_xcorr_coelution";
506 static constexpr const char* VAR_MS1_XCORR_COELUTION_CONTRAST = "var_ms1_xcorr_coelution_contrast";
507 static constexpr const char* VAR_MS1_XCORR_COELUTION_COMBINED = "var_ms1_xcorr_coelution_combined";
508 static constexpr const char* VAR_MS1_XCORR_SHAPE = "var_ms1_xcorr_shape";
509 static constexpr const char* VAR_MS1_XCORR_SHAPE_CONTRAST = "var_ms1_xcorr_shape_contrast";
510 static constexpr const char* VAR_MS1_XCORR_SHAPE_COMBINED = "var_ms1_xcorr_shape_combined";
511 static constexpr const char* MS2_AREA_INTENSITY = "ms2_area_intensity";
512 static constexpr const char* MS2_TOTAL_AREA_INTENSITY = "ms2_total_area_intensity";
513 static constexpr const char* MS2_APEX_INTENSITY = "ms2_apex_intensity";
514 static constexpr const char* MS2_EXP_IM = "ms2_exp_im";
515 static constexpr const char* MS2_EXP_IM_LEFTWIDTH = "ms2_exp_im_leftwidth";
516 static constexpr const char* MS2_EXP_IM_RIGHTWIDTH = "ms2_exp_im_rightwidth";
517 static constexpr const char* MS2_DELTA_IM = "ms2_delta_im";
518 static constexpr const char* MS2_TOTAL_MI = "ms2_total_mi";
519 static constexpr const char* VAR_MS2_BSERIES_SCORE = "var_ms2_bseries_score";
520 static constexpr const char* VAR_MS2_DOTPROD_SCORE = "var_ms2_dotprod_score";
521 static constexpr const char* VAR_MS2_INTENSITY_SCORE = "var_ms2_intensity_score";
522 static constexpr const char* VAR_MS2_ISOTOPE_CORRELATION_SCORE = "var_ms2_isotope_correlation_score";
523 static constexpr const char* VAR_MS2_ISOTOPE_OVERLAP_SCORE = "var_ms2_isotope_overlap_score";
524 static constexpr const char* VAR_MS2_LIBRARY_CORR = "var_ms2_library_corr";
525 static constexpr const char* VAR_MS2_LIBRARY_DOTPROD = "var_ms2_library_dotprod";
526 static constexpr const char* VAR_MS2_LIBRARY_MANHATTAN = "var_ms2_library_manhattan";
527 static constexpr const char* VAR_MS2_LIBRARY_RMSD = "var_ms2_library_rmsd";
528 static constexpr const char* VAR_MS2_LIBRARY_ROOTMEANSQUARE = "var_ms2_library_rootmeansquare";
529 static constexpr const char* VAR_MS2_LIBRARY_SANGLE = "var_ms2_library_sangle";
530 static constexpr const char* VAR_MS2_LOG_SN_SCORE = "var_ms2_log_sn_score";
531 static constexpr const char* VAR_MS2_MANHATTAN_SCORE = "var_ms2_manhattan_score";
532 static constexpr const char* VAR_MS2_MASSDEV_SCORE = "var_ms2_massdev_score";
533 static constexpr const char* VAR_MS2_MASSDEV_SCORE_WEIGHTED = "var_ms2_massdev_score_weighted";
534 static constexpr const char* VAR_MS2_MI_SCORE = "var_ms2_mi_score";
535 static constexpr const char* VAR_MS2_MI_WEIGHTED_SCORE = "var_ms2_mi_weighted_score";
536 static constexpr const char* VAR_MS2_MI_RATIO_SCORE = "var_ms2_mi_ratio_score";
537 static constexpr const char* VAR_MS2_NORM_RT_SCORE = "var_ms2_norm_rt_score";
538 static constexpr const char* VAR_MS2_XCORR_COELUTION = "var_ms2_xcorr_coelution";
539 static constexpr const char* VAR_MS2_XCORR_COELUTION_WEIGHTED = "var_ms2_xcorr_coelution_weighted";
540 static constexpr const char* VAR_MS2_XCORR_SHAPE = "var_ms2_xcorr_shape";
541 static constexpr const char* VAR_MS2_XCORR_SHAPE_WEIGHTED = "var_ms2_xcorr_shape_weighted";
542 static constexpr const char* VAR_MS2_YSERIES_SCORE = "var_ms2_yseries_score";
543 static constexpr const char* VAR_MS2_ELUTION_MODEL_FIT_SCORE = "var_ms2_elution_model_fit_score";
544 static constexpr const char* VAR_MS2_IM_XCORR_SHAPE = "var_ms2_im_xcorr_shape";
545 static constexpr const char* VAR_MS2_IM_XCORR_COELUTION = "var_ms2_im_xcorr_coelution";
546 static constexpr const char* VAR_MS2_IM_DELTA_SCORE = "var_ms2_im_delta_score";
547 static constexpr const char* VAR_MS2_IM_LOG_INTENSITY = "var_ms2_im_log_intensity";
548
549 static std::shared_ptr<arrow::Schema> schema();
550 };
551
553 struct OPENMS_DLLAPI OSWFeatureTransitionSchema
554 {
555 static constexpr const char* FEATURE_ID = "feature_id";
556 static constexpr const char* RUN_ID = "run_id";
557 static constexpr const char* TRANSITION_ID = "transition_id";
558 static constexpr const char* AREA_INTENSITY = "area_intensity";
559 static constexpr const char* TOTAL_AREA_INTENSITY = "total_area_intensity";
560 static constexpr const char* APEX_INTENSITY = "apex_intensity";
561 static constexpr const char* APEX_RT = "apex_rt";
562 static constexpr const char* RT_FWHM = "rt_fwhm";
563 static constexpr const char* MASSERROR_PPM = "masserror_ppm";
564 static constexpr const char* TOTAL_MI = "total_mi";
565 static constexpr const char* VAR_INTENSITY_SCORE = "var_intensity_score";
566 static constexpr const char* VAR_INTENSITY_RATIO_SCORE = "var_intensity_ratio_score";
567 static constexpr const char* VAR_LOG_INTENSITY = "var_log_intensity";
568 static constexpr const char* VAR_XCORR_COELUTION = "var_xcorr_coelution";
569 static constexpr const char* VAR_XCORR_SHAPE = "var_xcorr_shape";
570 static constexpr const char* VAR_LOG_SN_SCORE = "var_log_sn_score";
571 static constexpr const char* VAR_MASSDEV_SCORE = "var_massdev_score";
572 static constexpr const char* VAR_MI_SCORE = "var_mi_score";
573 static constexpr const char* VAR_MI_RATIO_SCORE = "var_mi_ratio_score";
574 static constexpr const char* VAR_ISOTOPE_CORRELATION_SCORE = "var_isotope_correlation_score";
575 static constexpr const char* VAR_ISOTOPE_OVERLAP_SCORE = "var_isotope_overlap_score";
576 static constexpr const char* EXP_IM = "exp_im";
577 static constexpr const char* EXP_IM_LEFTWIDTH = "exp_im_leftwidth";
578 static constexpr const char* EXP_IM_RIGHTWIDTH = "exp_im_rightwidth";
579 static constexpr const char* DELTA_IM = "delta_im";
580 static constexpr const char* VAR_IM_DELTA_SCORE = "var_im_delta_score";
581 static constexpr const char* VAR_IM_LOG_INTENSITY = "var_im_log_intensity";
582 static constexpr const char* VAR_IM_XCORR_COELUTION_CONTRAST = "var_im_xcorr_coelution_contrast";
583 static constexpr const char* VAR_IM_XCORR_SHAPE_CONTRAST = "var_im_xcorr_shape_contrast";
584 static constexpr const char* VAR_IM_XCORR_COELUTION_COMBINED = "var_im_xcorr_coelution_combined";
585 static constexpr const char* VAR_IM_XCORR_SHAPE_COMBINED = "var_im_xcorr_shape_combined";
586 static constexpr const char* START_POSITION_AT_5 = "start_position_at_5";
587 static constexpr const char* END_POSITION_AT_5 = "end_position_at_5";
588 static constexpr const char* START_POSITION_AT_10 = "start_position_at_10";
589 static constexpr const char* END_POSITION_AT_10 = "end_position_at_10";
590 static constexpr const char* START_POSITION_AT_50 = "start_position_at_50";
591 static constexpr const char* END_POSITION_AT_50 = "end_position_at_50";
592 static constexpr const char* TOTAL_WIDTH = "total_width";
593 static constexpr const char* TAILING_FACTOR = "tailing_factor";
594 static constexpr const char* ASYMMETRY_FACTOR = "asymmetry_factor";
595 static constexpr const char* SLOPE_OF_BASELINE = "slope_of_baseline";
596 static constexpr const char* BASELINE_DELTA_2_HEIGHT = "baseline_delta_2_height";
597 static constexpr const char* POINTS_ACROSS_BASELINE = "points_across_baseline";
598 static constexpr const char* POINTS_ACROSS_HALF_HEIGHT = "points_across_half_height";
599
600 static std::shared_ptr<arrow::Schema> schema();
601 };
602
604 struct OPENMS_DLLAPI XICSchema
605 {
606 static constexpr const char* RUN_ID = "RUN_ID";
607 static constexpr const char* SOURCE_FILE = "SOURCE_FILE";
608 static constexpr const char* MS_LEVEL = "MS_LEVEL";
609 static constexpr const char* PRECURSOR_ID = "PRECURSOR_ID";
610 static constexpr const char* TRANSITION_ID = "TRANSITION_ID";
611 static constexpr const char* MODIFIED_SEQUENCE = "MODIFIED_SEQUENCE";
612 static constexpr const char* PRECURSOR_CHARGE = "PRECURSOR_CHARGE";
613 static constexpr const char* PRODUCT_CHARGE = "PRODUCT_CHARGE";
614 static constexpr const char* DETECTING_TRANSITION = "DETECTING_TRANSITION";
615 static constexpr const char* PRECURSOR_DECOY = "PRECURSOR_DECOY";
616 static constexpr const char* PRODUCT_DECOY = "PRODUCT_DECOY";
617 static constexpr const char* TRANSITION_ORDINAL = "TRANSITION_ORDINAL";
618 static constexpr const char* TRANSITION_TYPE = "TRANSITION_TYPE";
619 static constexpr const char* ANNOTATION = "ANNOTATION";
620 static constexpr const char* RT_DATA = "RT_DATA";
621 static constexpr const char* INTENSITY_DATA = "INTENSITY_DATA";
622 static constexpr const char* RT_COMPRESSION = "RT_COMPRESSION";
623 static constexpr const char* INTENSITY_COMPRESSION = "INTENSITY_COMPRESSION";
624
625 static std::shared_ptr<arrow::Schema> schema();
626 };
627
629 struct OPENMS_DLLAPI XIMSchema
630 {
631 static constexpr const char* RUN_ID = "RUN_ID";
632 static constexpr const char* SOURCE_FILE = "SOURCE_FILE";
633 static constexpr const char* MS_LEVEL = "MS_LEVEL";
634 static constexpr const char* MOBILOGRAM_TYPE = "MOBILOGRAM_TYPE";
635 static constexpr const char* PRECURSOR_ID = "PRECURSOR_ID";
636 static constexpr const char* TRANSITION_ID = "TRANSITION_ID";
637 static constexpr const char* FEATURE_ID = "FEATURE_ID";
638 static constexpr const char* FEATURE_RT = "FEATURE_RT";
639 static constexpr const char* MODIFIED_SEQUENCE = "MODIFIED_SEQUENCE";
640 static constexpr const char* PRECURSOR_CHARGE = "PRECURSOR_CHARGE";
641 static constexpr const char* PRODUCT_CHARGE = "PRODUCT_CHARGE";
642 static constexpr const char* DETECTING_TRANSITION = "DETECTING_TRANSITION";
643 static constexpr const char* PRECURSOR_DECOY = "PRECURSOR_DECOY";
644 static constexpr const char* PRODUCT_DECOY = "PRODUCT_DECOY";
645 static constexpr const char* TRANSITION_ORDINAL = "TRANSITION_ORDINAL";
646 static constexpr const char* TRANSITION_TYPE = "TRANSITION_TYPE";
647 static constexpr const char* ANNOTATION = "ANNOTATION";
648 static constexpr const char* MOBILITY_DATA = "MOBILITY_DATA";
649 static constexpr const char* INTENSITY_DATA = "INTENSITY_DATA";
650 static constexpr const char* MOBILITY_COMPRESSION = "MOBILITY_COMPRESSION";
651 static constexpr const char* INTENSITY_COMPRESSION = "INTENSITY_COMPRESSION";
652
653 static std::shared_ptr<arrow::Schema> schema();
654 };
655
656} // namespace OpenMS
ValidationResult validate(const std::shared_ptr< arrow::Table > &table, const std::shared_ptr< arrow::Schema > &expected_schema, Mode mode=Mode::Strict)
Validate an Arrow table's schema against an expected schema.
Mode
Validation strictness: Strict requires exact match, Subset allows missing and extra columns.
Definition ArrowSchemaRegistry.h:32
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
@ RT
RT in seconds.
Definition ArrowIOHelpers.h:22
Result of schema validation containing validity flag and error messages.
Definition ArrowSchemaRegistry.h:39
std::vector< std::string > errors
Definition ArrowSchemaRegistry.h:41
Schema for chromatograms in long (one row per data point) format.
Definition ArrowSchemaRegistry.h:398
static std::shared_ptr< arrow::Schema > schema()
Schema for chromatograms in semi-wide (one row per chromatogram, list columns) format.
Definition ArrowSchemaRegistry.h:411
static std::shared_ptr< arrow::Schema > schema()
Schema for consensus feature table (ConsensusMap features)
Definition ArrowSchemaRegistry.h:151
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > handlesType()
static std::shared_ptr< arrow::DataType > metavaluesType()
Schema for LC-MS feature table (FeatureMap features)
Definition ArrowSchemaRegistry.h:125
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > metavaluesType()
static std::shared_ptr< arrow::DataType > convexHullType()
Schema for OpenSWATH feature-level precursor intensity table.
Definition ArrowSchemaRegistry.h:461
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH feature scoring results table.
Definition ArrowSchemaRegistry.h:482
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH per-transition feature scoring results table.
Definition ArrowSchemaRegistry.h:554
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH precursor (peptide query) table.
Definition ArrowSchemaRegistry.h:424
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH run metadata table.
Definition ArrowSchemaRegistry.h:473
static std::shared_ptr< arrow::Schema > schema()
Schema for OpenSWATH transition (fragment ion) table.
Definition ArrowSchemaRegistry.h:441
static std::shared_ptr< arrow::Schema > schema()
Schema for peptide-spectrum match (PSM) results table.
Definition ArrowSchemaRegistry.h:169
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > additionalScoresType()
static std::shared_ptr< arrow::DataType > modificationsType()
static std::shared_ptr< arrow::DataType > metavaluesType()
static std::shared_ptr< arrow::DataType > proteinAccessionsType()
Arrow type for protein_accessions: list<struct{accession, aa_before, aa_after, start,...
Schema for protein group (indistinguishable group) results table.
Definition ArrowSchemaRegistry.h:73
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > floatDataType()
static std::shared_ptr< arrow::DataType > stringDataType()
static std::shared_ptr< arrow::DataType > integerDataType()
Schema for protein identification results table.
Definition ArrowSchemaRegistry.h:54
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > modificationsType()
static std::shared_ptr< arrow::DataType > metavaluesType()
Schema for QPX feature view (quantms Parquet eXchange format)
Definition ArrowSchemaRegistry.h:261
static std::shared_ptr< arrow::Schema > schema()
Complete Arrow schema for QPX feature table (31 fields)
static std::shared_ptr< arrow::DataType > pgAccessionsType()
Arrow type for protein group accessions: list<struct{accession, start, end, pre, post}>
static std::shared_ptr< arrow::DataType > additionalScoresType()
Arrow type for additional scores (delegates to QPXPSMSchema::additionalScoresType)
static std::shared_ptr< arrow::DataType > additionalIntensitiesType()
Arrow type for additional intensities: list<struct{label, intensities: list<struct{....
static std::shared_ptr< arrow::DataType > intensitiesType()
Arrow type for intensities: list<struct{label, intensity}>
static std::shared_ptr< arrow::DataType > modificationsType()
Arrow type for modifications (delegates to QPXPSMSchema::modificationsType)
static std::shared_ptr< arrow::DataType > cvParamsType()
Arrow type for CV params (delegates to QPXPSMSchema::cvParamsType)
static std::shared_ptr< arrow::DataType > pgPositionsType()
Arrow type for protein group positions: list<struct{protein_accession, start, end}>
Schema for QPX PSM export (quantms Parquet eXchange format, PSM table)
Definition ArrowSchemaRegistry.h:218
static std::shared_ptr< arrow::Schema > schema()
Complete Arrow schema for QPX PSM table (24 fields)
static std::shared_ptr< arrow::DataType > crossLinksType()
Arrow type for cross-links: list<struct{xl_type, partner_sequence, ...}>
static std::shared_ptr< arrow::DataType > additionalScoresType()
Arrow type for additional scores: list<struct{score_name, score_value, higher_better}>
static std::shared_ptr< arrow::DataType > modificationsType()
Arrow type for modifications: list<struct{name, accession, positions: list<struct{position,...
static std::shared_ptr< arrow::DataType > cvParamsType()
Arrow type for CV params: list<struct{cv_name, cv_value}>
Schema for QPX protein group export (quantms Parquet eXchange format, pg table)
Definition ArrowSchemaRegistry.h:318
static std::shared_ptr< arrow::Schema > schema()
Complete Arrow schema for QPX pg table (20 fields)
static std::shared_ptr< arrow::DataType > additionalScoresType()
Arrow type for additional scores (delegates to QPXPSMSchema::additionalScoresType)
static std::shared_ptr< arrow::DataType > additionalIntensitiesType()
Arrow type for additional intensities: list<struct{label, intensities: list<struct{....
static std::shared_ptr< arrow::DataType > intensitiesType()
Arrow type for intensities: list<struct{label, intensity}> (nullable for search-engine output)
static std::shared_ptr< arrow::DataType > peptidesType()
Arrow type for peptides: list<struct{protein_name, peptide_count}>
static std::shared_ptr< arrow::DataType > cvParamsType()
Arrow type for CV params (delegates to QPXPSMSchema::cvParamsType)
static std::shared_ptr< arrow::DataType > featureCountsType()
Arrow type for feature_counts: struct{unique_features, total_features}.
static std::shared_ptr< arrow::DataType > peptideCountsType()
Arrow type for peptide_counts: struct{unique_sequences, total_sequences}.
Schema for search engine parameters and settings table.
Definition ArrowSchemaRegistry.h:91
static std::shared_ptr< arrow::Schema > schema()
static std::shared_ptr< arrow::DataType > metavaluesType()
Schema for spectra in long (one row per peak) format.
Definition ArrowSchemaRegistry.h:360
static std::shared_ptr< arrow::Schema > schema()
Schema for spectra in semi-wide (one row per spectrum, list columns for peaks) format.
Definition ArrowSchemaRegistry.h:379
static std::shared_ptr< arrow::Schema > schema()
Schema for extracted ion chromatogram (XIC) data table.
Definition ArrowSchemaRegistry.h:605
static std::shared_ptr< arrow::Schema > schema()
Schema for extracted ion mobilogram (XIM) data table.
Definition ArrowSchemaRegistry.h:630
static std::shared_ptr< arrow::Schema > schema()