-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmmap_reader.cpp
More file actions
114 lines (99 loc) · 2.94 KB
/
Copy pathmmap_reader.cpp
File metadata and controls
114 lines (99 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "mmap_reader.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <cstring>
#include <stdexcept>
#include <system_error>
#include <utility>
namespace {
void throw_errno(const std::string& what) {
throw std::system_error(errno, std::generic_category(), what);
}
} // namespace
MmapReader::MmapReader(const std::string& path) {
fd_ = ::open(path.c_str(), O_RDONLY);
if (fd_ < 0) throw_errno("open(" + path + ")");
struct stat st;
if (::fstat(fd_, &st) != 0) {
int e = errno;
::close(fd_);
fd_ = -1;
throw std::system_error(e, std::generic_category(), "fstat(" + path + ")");
}
size_ = static_cast<std::size_t>(st.st_size);
if (size_ == 0) {
// mmap() rejects a zero length; an empty file simply yields no chunks.
data_ = nullptr;
return;
}
int flags = MAP_PRIVATE;
#ifdef MAP_POPULATE
// Linux: prefault all pages now so processing doesn't stall on minor faults.
flags |= MAP_POPULATE;
#endif
void* p = ::mmap(nullptr, size_, PROT_READ, flags, fd_, 0);
if (p == MAP_FAILED) {
int e = errno;
::close(fd_);
fd_ = -1;
throw std::system_error(e, std::generic_category(), "mmap(" + path + ")");
}
data_ = static_cast<const char*>(p);
// We scan front-to-back exactly once: ask the kernel for aggressive readahead
// and to drop pages behind us, which keeps a >RAM file from thrashing.
::madvise(const_cast<char*>(data_), size_, MADV_SEQUENTIAL | MADV_WILLNEED);
}
void MmapReader::reset() noexcept {
if (data_ != nullptr && size_ > 0) {
::munmap(const_cast<char*>(data_), size_);
}
if (fd_ >= 0) {
::close(fd_);
}
fd_ = -1;
data_ = nullptr;
size_ = 0;
}
MmapReader::~MmapReader() { reset(); }
MmapReader::MmapReader(MmapReader&& other) noexcept
: fd_(other.fd_), data_(other.data_), size_(other.size_) {
other.fd_ = -1;
other.data_ = nullptr;
other.size_ = 0;
}
MmapReader& MmapReader::operator=(MmapReader&& other) noexcept {
if (this != &other) {
reset();
fd_ = other.fd_;
data_ = other.data_;
size_ = other.size_;
other.fd_ = -1;
other.data_ = nullptr;
other.size_ = 0;
}
return *this;
}
std::vector<Chunk> MmapReader::chunks(std::size_t target_size) const {
std::vector<Chunk> result;
if (size_ == 0 || target_size == 0) return result;
const char* const base = data_;
const char* const end = data_ + size_;
result.reserve(size_ / target_size + 1);
const char* cur = base;
while (cur < end) {
const char* chunk_end = cur + target_size;
if (chunk_end >= end) {
chunk_end = end; // last chunk runs to EOF
} else {
// Extend to just past the next newline so we never split a record.
const char* nl = static_cast<const char*>(
::memchr(chunk_end, '\n', static_cast<std::size_t>(end - chunk_end)));
chunk_end = (nl == nullptr) ? end : nl + 1;
}
result.push_back(Chunk{cur, chunk_end});
cur = chunk_end;
}
return result;
}