ORE Studio 0.0.4
Loading...
Searching...
No Matches
publication_service.hpp
1/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 *
3 * Copyright (C) 2025 Marco Craveiro <marco.craveiro@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free Software
7 * Foundation; either version 3 of the License, or (at your option) any later
8 * version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 *
19 */
20#ifndef ORES_DQ_CORE_SERVICE_PUBLICATION_SERVICE_HPP
21#define ORES_DQ_CORE_SERVICE_PUBLICATION_SERVICE_HPP
22
23#include <map>
24#include <string>
25#include <vector>
26#include <boost/uuid/uuid.hpp>
27#include "ores.logging/make_logger.hpp"
28#include "ores.database/domain/context.hpp"
29#include "ores.dq.api/domain/artefact_type.hpp"
30#include "ores.dq.api/domain/dataset.hpp"
31#include "ores.dq.api/domain/publication.hpp"
32#include "ores.dq.api/domain/publication_mode.hpp"
33#include "ores.dq.api/domain/publication_result.hpp"
34#include "ores.dq.core/repository/dataset_repository.hpp"
35#include "ores.dq.core/repository/dataset_dependency_repository.hpp"
36#include "ores.dq.core/repository/publication_repository.hpp"
37#include "ores.dq.core/repository/artefact_type_repository.hpp"
38
39namespace ores::dq::service {
40
41struct bundle_dataset_result {
42 std::string dataset_code;
43 std::string dataset_name;
44 std::string status;
45 std::string error_message;
46 std::uint64_t records_inserted = 0;
47 std::uint64_t records_updated = 0;
48 std::uint64_t records_skipped = 0;
49 std::uint64_t records_deleted = 0;
50};
51
52struct publish_bundle_result {
53 bool success = false;
54 std::string error_message;
55 std::uint32_t datasets_processed = 0;
56 std::uint32_t datasets_succeeded = 0;
57 std::uint32_t datasets_failed = 0;
58 std::uint32_t datasets_skipped = 0;
59 std::uint64_t total_records_inserted = 0;
60 std::uint64_t total_records_updated = 0;
61 std::uint64_t total_records_skipped = 0;
62 std::uint64_t total_records_deleted = 0;
63 std::vector<bundle_dataset_result> dataset_results;
64};
65
79private:
80 inline static std::string_view logger_name =
81 "ores.dq.service.publication_service";
82
83 [[nodiscard]] static auto& lg() {
84 using namespace ores::logging;
85 static auto instance = make_logger(logger_name);
86 return instance;
87 }
88
89public:
91
97 explicit publication_service(context ctx);
98
114 std::vector<domain::publication_result> publish(
115 const std::vector<boost::uuids::uuid>& dataset_ids,
116 domain::publication_mode mode,
117 const std::string& published_by,
118 bool resolve_dependencies = true);
119
135 publish_bundle_result publish_bundle(
136 const std::string& bundle_code,
137 domain::publication_mode mode,
138 const std::string& published_by,
139 bool atomic = true,
140 const std::string& params_json = "");
141
152 std::vector<domain::dataset> resolve_publication_order(
153 const std::vector<boost::uuids::uuid>& dataset_ids);
154
161 std::vector<domain::publication> get_publication_history(
162 const boost::uuids::uuid& dataset_id);
163
170 std::vector<domain::publication> get_recent_publications(
171 std::uint32_t limit = 100);
172
173private:
180 std::map<std::string, domain::artefact_type> build_artefact_type_cache(
181 const std::vector<domain::dataset>& datasets);
182
194 domain::publication_result publish_dataset(
195 const domain::dataset& dataset,
196 domain::publication_mode mode,
197 const std::string& published_by,
198 const std::map<std::string, domain::artefact_type>& artefact_type_cache);
199
207 void record_publication(
208 const domain::publication_result& result,
209 domain::publication_mode mode,
210 const std::string& published_by);
211
220 domain::publication_result call_populate_function(
221 const domain::dataset& dataset,
222 const domain::artefact_type& artefact_type,
223 domain::publication_mode mode,
224 const std::string& published_by);
225
226 context ctx_;
227 repository::dataset_repository dataset_repo_;
229 repository::publication_repository publication_repo_;
230 repository::artefact_type_repository artefact_type_repo_;
231};
232
233}
234
235#endif
Implements logging infrastructure for ORE Studio.
Definition boost_severity.hpp:28
Context for the operations on a postgres database.
Definition context.hpp:47
Maps artefact type codes to their population functions and tables.
Definition artefact_type.hpp:39
Represents a data quality dataset with lineage tracking.
Definition dataset.hpp:37
Result of publishing a single dataset to a target table.
Definition publication_result.hpp:35
Reads artefact_types from data storage.
Definition artefact_type_repository.hpp:39
Reads dataset dependencies from data storage.
Definition dataset_dependency_repository.hpp:35
Reads and writes datasets to data storage.
Definition dataset_repository.hpp:36
Repository for reading and writing publication audit records.
Definition publication_repository.hpp:37
Service for publishing datasets to production tables.
Definition publication_service.hpp:78
publish_bundle_result publish_bundle(const std::string &bundle_code, domain::publication_mode mode, const std::string &published_by, bool atomic=true, const std::string &params_json="")
Publishes all datasets in a bundle.
Definition publication_service.cpp:426
std::vector< domain::publication > get_publication_history(const boost::uuids::uuid &dataset_id)
Gets the publication history for a dataset.
Definition publication_service.cpp:229
std::vector< domain::publication > get_recent_publications(std::uint32_t limit=100)
Gets recent publication history across all datasets.
Definition publication_service.cpp:238
std::vector< domain::publication_result > publish(const std::vector< boost::uuids::uuid > &dataset_ids, domain::publication_mode mode, const std::string &published_by, bool resolve_dependencies=true)
Publishes one or more datasets to production tables.
Definition publication_service.cpp:45
std::vector< domain::dataset > resolve_publication_order(const std::vector< boost::uuids::uuid > &dataset_ids)
Resolves the publication order for datasets.
Definition publication_service.cpp:117