ORE Studio 0.0.4
Loading...
Searching...
No Matches
dataset.hpp
1/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 *
3 * Copyright (C) 2025 Marco Craveiro <marco.craveiro@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free Software
7 * Foundation; either version 3 of the License, or (at your option) any later
8 * version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 *
19 */
20#ifndef ORES_DQ_DOMAIN_DATASET_HPP
21#define ORES_DQ_DOMAIN_DATASET_HPP
22
23#include <chrono>
24#include <optional>
25#include <string>
26#include <boost/uuid/uuid.hpp>
27
28namespace ores::dq::domain {
29
36struct dataset final {
40 int version = 0;
41
47 boost::uuids::uuid id;
48
55 std::string code;
56
62 std::optional<std::string> catalog_name;
63
69 std::string subject_area_name;
70
76 std::string domain_name;
77
83 std::optional<std::string> coding_scheme_code;
84
90 std::string origin_code;
91
97 std::string nature_code;
98
104 std::string treatment_code;
105
111 std::optional<boost::uuids::uuid> methodology_id;
112
116 std::string name;
117
121 std::string description;
122
126 std::string source_system_id;
127
131 std::string business_context;
132
138 std::optional<boost::uuids::uuid> upstream_derivation_id;
139
146
152 std::chrono::system_clock::time_point as_of_date;
153
157 std::chrono::system_clock::time_point ingestion_timestamp;
158
162 std::optional<std::string> license_info;
163
170 std::optional<std::string> artefact_type;
171
175 std::string recorded_by;
176
180 std::string change_commentary;
181
185 std::chrono::system_clock::time_point recorded_at;
186};
187
188}
189
190#endif
Represents a data quality dataset with lineage tracking.
Definition dataset.hpp:36
std::string domain_name
Data domain this dataset applies to.
Definition dataset.hpp:76
std::optional< std::string > catalog_name
Optional catalog this dataset belongs to.
Definition dataset.hpp:62
int lineage_depth
Depth in the derivation chain from the original source.
Definition dataset.hpp:145
std::string change_commentary
Free-text commentary explaining the change.
Definition dataset.hpp:180
std::string description
Detailed description of the dataset's contents and purpose.
Definition dataset.hpp:121
std::string code
Unique code for stable referencing.
Definition dataset.hpp:55
std::optional< boost::uuids::uuid > methodology_id
Optional methodology used to produce this dataset.
Definition dataset.hpp:111
std::string origin_code
Code indicating the origin of the data.
Definition dataset.hpp:90
std::string source_system_id
Identifier of the source system where data originated.
Definition dataset.hpp:126
std::chrono::system_clock::time_point as_of_date
Business date the data represents.
Definition dataset.hpp:152
std::chrono::system_clock::time_point recorded_at
Timestamp when this version of the record was recorded.
Definition dataset.hpp:185
std::string nature_code
Code indicating the nature of the data.
Definition dataset.hpp:97
std::optional< std::string > coding_scheme_code
Optional coding scheme used for identifiers in this dataset.
Definition dataset.hpp:83
std::string name
Human-readable name for the dataset.
Definition dataset.hpp:116
std::string recorded_by
Username of the person who last modified this dataset.
Definition dataset.hpp:175
boost::uuids::uuid id
UUID uniquely identifying this dataset.
Definition dataset.hpp:47
std::string business_context
Business context describing the dataset's role and usage.
Definition dataset.hpp:131
int version
Version number for optimistic locking and change tracking.
Definition dataset.hpp:40
std::string subject_area_name
Subject area this dataset belongs to.
Definition dataset.hpp:69
std::string treatment_code
Code indicating how the data was treated or processed.
Definition dataset.hpp:104
std::optional< std::string > artefact_type
Type of artefact this dataset populates.
Definition dataset.hpp:170
std::optional< std::string > license_info
Optional license information for the data.
Definition dataset.hpp:162
std::chrono::system_clock::time_point ingestion_timestamp
Timestamp when the data was ingested into the system.
Definition dataset.hpp:157
std::optional< boost::uuids::uuid > upstream_derivation_id
Optional reference to an upstream dataset this was derived from.
Definition dataset.hpp:138