ORE Studio 0.0.4
Loading...
Searching...
No Matches
dataset.hpp
1/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 *
3 * Copyright (C) 2025 Marco Craveiro <marco.craveiro@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free Software
7 * Foundation; either version 3 of the License, or (at your option) any later
8 * version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 *
19 */
20#ifndef ORES_DQ_API_DOMAIN_DATASET_HPP
21#define ORES_DQ_API_DOMAIN_DATASET_HPP
22
23#include <chrono>
24#include <optional>
25#include <string>
26#include <boost/uuid/uuid.hpp>
27#include "ores.utility/uuid/tenant_id.hpp"
28
29namespace ores::dq::domain {
30
37struct dataset final {
41 int version = 0;
42
47
53 boost::uuids::uuid id;
54
61 std::string code;
62
68 std::optional<std::string> catalog_name;
69
75 std::string subject_area_name;
76
82 std::string domain_name;
83
89 std::optional<std::string> coding_scheme_code;
90
96 std::string origin_code;
97
103 std::string nature_code;
104
110 std::string treatment_code;
111
117 std::optional<boost::uuids::uuid> methodology_id;
118
122 std::string name;
123
127 std::string description;
128
132 std::string source_system_id;
133
137 std::string business_context;
138
144 std::optional<boost::uuids::uuid> upstream_derivation_id;
145
152
158 std::chrono::system_clock::time_point as_of_date;
159
163 std::chrono::system_clock::time_point ingestion_timestamp;
164
168 std::optional<std::string> license_info;
169
176 std::optional<std::string> artefact_type;
177
181 std::string modified_by;
182
186 std::string change_commentary;
187
191 std::string performed_by;
192
196 std::chrono::system_clock::time_point recorded_at;
197};
198
199}
200
201#endif
Represents a data quality dataset with lineage tracking.
Definition dataset.hpp:37
std::string domain_name
Data domain this dataset applies to.
Definition dataset.hpp:82
std::string modified_by
Username of the person who last modified this dataset.
Definition dataset.hpp:181
std::optional< std::string > catalog_name
Optional catalog this dataset belongs to.
Definition dataset.hpp:68
int lineage_depth
Depth in the derivation chain from the original source.
Definition dataset.hpp:151
std::string change_commentary
Free-text commentary explaining the change.
Definition dataset.hpp:186
std::string description
Detailed description of the dataset's contents and purpose.
Definition dataset.hpp:127
std::string code
Unique code for stable referencing.
Definition dataset.hpp:61
std::optional< boost::uuids::uuid > methodology_id
Optional methodology used to produce this dataset.
Definition dataset.hpp:117
std::string origin_code
Code indicating the origin of the data.
Definition dataset.hpp:96
std::string source_system_id
Identifier of the source system where data originated.
Definition dataset.hpp:132
std::chrono::system_clock::time_point as_of_date
Business date the data represents.
Definition dataset.hpp:158
std::chrono::system_clock::time_point recorded_at
Timestamp when this version of the record was recorded.
Definition dataset.hpp:196
std::string nature_code
Code indicating the nature of the data.
Definition dataset.hpp:103
std::optional< std::string > coding_scheme_code
Optional coding scheme used for identifiers in this dataset.
Definition dataset.hpp:89
std::string name
Human-readable name for the dataset.
Definition dataset.hpp:122
boost::uuids::uuid id
UUID uniquely identifying this dataset.
Definition dataset.hpp:53
std::string business_context
Business context describing the dataset's role and usage.
Definition dataset.hpp:137
int version
Version number for optimistic locking and change tracking.
Definition dataset.hpp:41
std::string subject_area_name
Subject area this dataset belongs to.
Definition dataset.hpp:75
std::string treatment_code
Code indicating how the data was treated or processed.
Definition dataset.hpp:110
std::string performed_by
Username of the account that performed this operation.
Definition dataset.hpp:191
std::optional< std::string > artefact_type
Type of artefact this dataset populates.
Definition dataset.hpp:176
std::optional< std::string > license_info
Optional license information for the data.
Definition dataset.hpp:168
std::chrono::system_clock::time_point ingestion_timestamp
Timestamp when the data was ingested into the system.
Definition dataset.hpp:163
utility::uuid::tenant_id tenant_id
Tenant identifier for multi-tenancy isolation.
Definition dataset.hpp:46
std::optional< boost::uuids::uuid > upstream_derivation_id
Optional reference to an upstream dataset this was derived from.
Definition dataset.hpp:144
A strongly-typed wrapper around a UUID representing a tenant identifier.
Definition tenant_id.hpp:66
static tenant_id system()
Creates a tenant_id representing the system tenant.
Definition tenant_id.cpp:41