Skip to content

Commit

Permalink
refactor: factor out metadata_builder from scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Nov 27, 2024
1 parent e121b04 commit 4f20bb9
Show file tree
Hide file tree
Showing 11 changed files with 653 additions and 271 deletions.
1 change: 1 addition & 0 deletions cmake/libdwarfs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ add_library(
src/writer/internal/inode_element_view.cpp
src/writer/internal/inode_manager.cpp
src/writer/internal/inode_ordering.cpp
src/writer/internal/metadata_builder.cpp
src/writer/internal/metadata_freezer.cpp
src/writer/internal/nilsimsa.cpp
src/writer/internal/progress.cpp
Expand Down
8 changes: 4 additions & 4 deletions include/dwarfs/writer/internal/global_entry_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

namespace dwarfs {

struct scanner_options;
struct metadata_options;

namespace writer::internal {

Expand All @@ -44,8 +44,8 @@ class global_entry_data {

enum class timestamp_type { ATIME, MTIME, CTIME };

global_entry_data(scanner_options const& options)
: options_(options) {}
global_entry_data(metadata_options const& options)
: options_{options} {}

void add_uid(uid_type uid);
void add_gid(gid_type gid);
Expand Down Expand Up @@ -111,7 +111,7 @@ class global_entry_data {
gid_type next_gid_index_{0};
mode_type next_mode_index_{0};
uint64_t timestamp_base_{std::numeric_limits<uint64_t>::max()};
scanner_options const& options_;
metadata_options const& options_;
};

} // namespace writer::internal
Expand Down
141 changes: 141 additions & 0 deletions include/dwarfs/writer/internal/metadata_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/

#pragma once

#include <cstdint>
#include <memory>
#include <span>
#include <utility>
#include <vector>

namespace dwarfs {

class logger;

namespace writer {
struct metadata_options;
}

namespace thrift::metadata {
class metadata;
} // namespace thrift::metadata

namespace writer::internal {

class global_entry_data;
class inode_manager;
class block_manager;
class dir;

class metadata_builder {
public:
metadata_builder(logger& lgr, metadata_options const& options);
metadata_builder(logger& lgr, thrift::metadata::metadata const& md,
metadata_options const& options);
metadata_builder(logger& lgr, thrift::metadata::metadata&& md,
metadata_options const& options);
~metadata_builder();

void set_devices(std::vector<uint64_t> devices) {
impl_->set_devices(std::move(devices));
}

void set_symlink_table_size(size_t size) {
impl_->set_symlink_table_size(size);
}

void set_block_size(uint32_t block_size) {
impl_->set_block_size(block_size);
}

void set_total_fs_size(uint64_t total_fs_size) {
impl_->set_total_fs_size(total_fs_size);
}

void set_total_hardlink_size(uint64_t total_hardlink_size) {
impl_->set_total_hardlink_size(total_hardlink_size);
}

void set_shared_files_table(std::vector<uint32_t> shared_files) {
impl_->set_shared_files_table(std::move(shared_files));
}

void set_category_names(std::vector<std::string> category_names) {
impl_->set_category_names(std::move(category_names));
}

void set_block_categories(std::vector<uint32_t> block_categories) {
impl_->set_block_categories(std::move(block_categories));
}

void add_symlink_table_entry(size_t index, uint32_t entry) {
impl_->add_symlink_table_entry(index, entry);
}

void gather_chunks(inode_manager const& im, block_manager const& bm,
size_t chunk_count) {
impl_->gather_chunks(im, bm, chunk_count);
}

void gather_entries(std::span<dir*> dirs, global_entry_data const& ge_data,
uint32_t num_inodes) {
impl_->gather_entries(dirs, ge_data, num_inodes);
}

void gather_global_entry_data(global_entry_data const& ge_data) {
return impl_->gather_global_entry_data(ge_data);
}

thrift::metadata::metadata const& build() { return impl_->build(); }

class impl {
public:
virtual ~impl() = default;

virtual void set_devices(std::vector<uint64_t> devices) = 0;
virtual void set_symlink_table_size(size_t size) = 0;
virtual void set_block_size(uint32_t block_size) = 0;
virtual void set_total_fs_size(uint64_t total_fs_size) = 0;
virtual void set_total_hardlink_size(uint64_t total_hardlink_size) = 0;
virtual void set_shared_files_table(std::vector<uint32_t> shared_files) = 0;
virtual void
set_category_names(std::vector<std::string> category_names) = 0;
virtual void
set_block_categories(std::vector<uint32_t> block_categories) = 0;
virtual void add_symlink_table_entry(size_t index, uint32_t entry) = 0;
virtual void gather_chunks(inode_manager const& im, block_manager const& bm,
size_t chunk_count) = 0;
virtual void
gather_entries(std::span<dir*> dirs, global_entry_data const& ge_data,
uint32_t num_inodes) = 0;
virtual void gather_global_entry_data(global_entry_data const& ge_data) = 0;

virtual thrift::metadata::metadata const& build() = 0;
};

private:
std::unique_ptr<impl> impl_;
};

} // namespace writer::internal

} // namespace dwarfs
59 changes: 59 additions & 0 deletions include/dwarfs/writer/metadata_options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/

#pragma once

#include <cstddef>
#include <cstdint>
#include <functional>
#include <optional>
#include <string>
#include <vector>

#include <dwarfs/file_stat.h>
#include <dwarfs/history_config.h>
#include <dwarfs/writer/inode_options.h>

namespace dwarfs::writer {

class entry_interface;

struct metadata_options {
std::optional<file_stat::uid_type> uid{};
std::optional<file_stat::gid_type> gid{};
std::optional<uint64_t> timestamp{};
bool keep_all_times{false};
uint32_t time_resolution_sec{1};
bool pack_chunk_table{false};
bool pack_directories{false};
bool pack_shared_files_table{false};
bool plain_names_table{false};
bool pack_names{false};
bool pack_names_index{false};
bool plain_symlinks_table{false};
bool pack_symlinks{false};
bool pack_symlinks_index{false};
bool force_pack_string_tables{false};
bool no_create_timestamp{false};
size_t inode_size_cache_min_chunk_count{128};
};

} // namespace dwarfs::writer
19 changes: 2 additions & 17 deletions include/dwarfs/writer/scanner_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,40 +31,25 @@
#include <dwarfs/file_stat.h>
#include <dwarfs/history_config.h>
#include <dwarfs/writer/inode_options.h>
#include <dwarfs/writer/metadata_options.h>

namespace dwarfs::writer {

class entry_interface;

struct scanner_options {
std::optional<std::string> file_hash_algorithm{"xxh3-128"};
std::optional<file_stat::uid_type> uid;
std::optional<file_stat::gid_type> gid;
std::optional<uint64_t> timestamp;
bool keep_all_times{false};
bool remove_empty_dirs{false};
bool with_devices{false};
bool with_specials{false};
uint32_t time_resolution_sec{1};
inode_options inode;
bool pack_chunk_table{false};
bool pack_directories{false};
bool pack_shared_files_table{false};
bool plain_names_table{false};
bool pack_names{false};
bool pack_names_index{false};
bool plain_symlinks_table{false};
bool pack_symlinks{false};
bool pack_symlinks_index{false};
bool force_pack_string_tables{false};
bool no_create_timestamp{false};
std::optional<std::function<void(bool, writer::entry_interface const&)>>
debug_filter_function;
size_t num_segmenter_workers{1};
bool enable_history{true};
std::optional<std::vector<std::string>> command_line_arguments;
history_config history;
size_t inode_size_cache_min_chunk_count{128};
metadata_options metadata;
};

} // namespace dwarfs::writer
6 changes: 2 additions & 4 deletions src/writer/internal/global_entry_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,11 @@ std::vector<T> global_entry_data::get_vector(map_type<T, U> const& map) const {
}

auto global_entry_data::get_uids() const -> std::vector<uid_type> {
return options_.uid ? std::vector<uid_type>{*options_.uid}
: get_vector(uids_);
return get_vector(uids_);
}

auto global_entry_data::get_gids() const -> std::vector<gid_type> {
return options_.gid ? std::vector<gid_type>{*options_.gid}
: get_vector(gids_);
return get_vector(gids_);
}

auto global_entry_data::get_modes() const -> std::vector<mode_type> {
Expand Down
Loading

0 comments on commit 4f20bb9

Please sign in to comment.