OpenGothic
Open source reimplementation of Gothic I and II
Loading...
Searching...
No Matches
instancestorage.cpp
Go to the documentation of this file.
1#include "instancestorage.h"
2#include "shaders.h"
3#include "utils/workers.h"
4
5#include <Tempest/Log>
6#include <cstdint>
7#include <atomic>
8
9using namespace Tempest;
10
11static uint32_t nextPot(uint32_t v) {
12 v--;
13 v |= v >> 1;
14 v |= v >> 2;
15 v |= v >> 4;
16 v |= v >> 8;
17 v |= v >> 16;
18 v++;
19 return v;
20 }
21
22static uint32_t alignAs(uint32_t sz, uint32_t alignment) {
23 return ((sz+alignment-1)/alignment)*alignment;
24 }
25
26static void bitSet(std::vector<uint32_t>& b, size_t id) {
27 static_assert(sizeof(std::atomic<uint32_t>)==sizeof(uint32_t));
28 auto& bits = b[id/32];
29 id %= 32;
30 reinterpret_cast<std::atomic<uint32_t>&>(bits).fetch_or(1u << id, std::memory_order_relaxed);
31 }
32
33static bool bitAt(std::vector<uint32_t>& b, size_t id) {
34 auto bits = b[id/32];
35 id %= 32;
36 return bits & (1u << id);
37 }
38
39using namespace Tempest;
40
41InstanceStorage::Id::Id(Id&& other) noexcept
42 :owner(other.owner), rgn(other.rgn) {
43 other.owner = nullptr;
44 }
45
47 std::swap(owner, other.owner);
48 std::swap(rgn, other.rgn);
49 return *this;
50 }
51
53 if(owner!=nullptr)
54 owner->free(rgn);
55 }
56
57void InstanceStorage::Id::set(const Tempest::Matrix4x4* mat) {
58 if(owner==nullptr)
59 return;
60
61 auto data = reinterpret_cast<Matrix4x4*>(owner->dataCpu.data() + rgn.begin);
62 std::memcpy(data, mat, rgn.asize);
63
64 for(size_t i=0; i<rgn.asize; i+=blockSz)
65 bitSet(owner->durty, (rgn.begin+i)/blockSz);
66 }
67
68void InstanceStorage::Id::set(const Tempest::Matrix4x4& obj, size_t offset) {
69 if(owner==nullptr)
70 return;
71
72 auto data = reinterpret_cast<Matrix4x4*>(owner->dataCpu.data() + rgn.begin);
73 if(data[offset] == obj)
74 return;
75 data[offset] = obj;
76 bitSet(owner->durty, (rgn.begin+offset*sizeof(Matrix4x4))/blockSz);
77 }
78
79void InstanceStorage::Id::set(const void* data, size_t offset, size_t size) {
80 if(owner==nullptr)
81 return;
82
83 auto src = reinterpret_cast<const uint8_t*>(data);
84 auto dst = (owner->dataCpu.data() + rgn.begin + offset);
85
86 if(std::memcmp(src, dst, size)==0)
87 return;
88
89 if((offset%blockSz)==0) {
90 for(size_t i=0; i<size; i+=blockSz) {
91 const size_t sz = std::min(blockSz, size-i);
92 std::memcpy(dst+i, src+i, sz);
93 bitSet(owner->durty, (rgn.begin + offset + i)/blockSz);
94 }
95 } else {
96 for(size_t i=0; i<size; ++i) {
97 dst[i] = src[i];
98 bitSet(owner->durty, (rgn.begin + offset + i)/blockSz);
99 }
100 }
101 }
102
103
105 dataCpu.reserve(131072);
106 dataCpu.resize(sizeof(Matrix4x4)); // also avoid null-ssbo
107 reinterpret_cast<Matrix4x4*>(dataCpu.data())->identity();
108
109 patchCpu.reserve(4*1024*1024);
110 patchBlock.reserve(16*1024);
111
112 uploadTh = std::thread([this](){ uploadMain(); });
113 }
114
116 {
117 std::unique_lock<std::mutex> lck(sync);
119 }
120 uploadCnd.notify_one();
121 uploadTh.join();
122 }
123
124bool InstanceStorage::commit(Encoder<CommandBuffer>& cmd, uint8_t fId) {
125 auto& device = Resources::device();
126
127 std::atomic_thread_fence(std::memory_order_acquire);
128 join();
129
130 const size_t dataSize = (dataCpu.size() + 0xFFF) & ~size_t(0xFFF);
131 if(dataGpu.byteSize()!=dataSize) {
132 Resources::recycle(std::move(dataGpu));
133 dataGpu = device.ssbo(BufferHeap::Device,Tempest::Uninitialized,dataSize);
134 dataGpu.update(dataCpu);
135 std::memset(durty.data(), 0, durty.size()*sizeof(uint32_t));
136 return true;
137 }
138
139 patchBlock.clear();
140 size_t payloadSize = 0;
141 for(size_t i = 0; i<blockCnt; ++i) {
142 if(i%32==0 && durty[i/32]==0) {
143 i+=31;
144 continue;
145 }
146 if(!bitAt(durty,i))
147 continue;
148 auto begin = i; ++i;
149 while(i<blockCnt) {
150 if(!bitAt(durty,i))
151 break;
152 ++i;
153 }
154
155 uint32_t size = uint32_t((i-begin)*blockSz);
156 uint32_t chunkSz = 256;
157
158 Path p = {};
159 p.dst = uint32_t(begin*blockSz);
160 p.src = uint32_t(payloadSize);
161 while(size>0) {
162 p.size = std::min<uint32_t>(size, chunkSz);
163 size -= p.size;
164 patchBlock.push_back(p);
165
166 payloadSize += p.size;
167 p.dst += p.size;
168 p.src += p.size;
169 }
170 }
171 std::memset(durty.data(), 0, durty.size()*sizeof(durty[0]));
172
173 if(patchBlock.size()==0)
174 return false;
175
176 const size_t headerSize = patchBlock.size()*sizeof(Path);
177 patchCpu.resize(headerSize + payloadSize);
178 for(auto& i:patchBlock) {
179 i.src += uint32_t(headerSize);
180 std::memcpy(patchCpu.data()+i.src, dataCpu.data() + i.dst, i.size);
181
182 // uint's in shader
183 i.src /= 4;
184 i.dst /= 4;
185 i.size /= 4;
186 }
187 std::memcpy(patchCpu.data(), patchBlock.data(), headerSize);
188
189 auto& path = patchGpu[fId];
190 if(path.byteSize() < headerSize + payloadSize) {
191 path = device.ssbo(BufferHeap::Upload, Uninitialized, headerSize + payloadSize);
192 }
193
194 {
195 std::unique_lock<std::mutex> lck(sync);
196 uploadFId = fId;
197 }
198 uploadCnd.notify_one();
199 //path.update(patchCpu);
200
201 cmd.setFramebuffer({});
202 cmd.setBinding(0, dataGpu);
203 cmd.setBinding(1, path);
204 cmd.setPipeline(Shaders::inst().patch);
205 cmd.dispatch(patchBlock.size());
206 return false;
207 }
208
210 while(true) {
211 std::unique_lock<std::mutex> lck(sync);
212 if(uploadFId<0)
213 break;
214 }
215 }
216
218 if(size==0)
219 return Id(*this,Range());
220
221 const auto nsize = alignAs(nextPot(uint32_t(size)), alignment);
222 for(size_t i=0; i<rgn.size(); ++i) {
223 if(rgn[i].size==nsize) {
224 auto ret = rgn[i];
225 ret.asize = size;
226 rgn.erase(rgn.begin()+intptr_t(i));
227 return Id(*this,ret);
228 }
229 }
230 size_t retId = size_t(-1);
231 for(size_t i=0; i<rgn.size(); ++i) {
232 if(rgn[i].size>nsize && (retId==size_t(-1) || rgn[i].size<rgn[retId].size)) {
233 retId = i;
234 }
235 }
236 if(retId!=size_t(-1)) {
237 Range ret = rgn[retId];
238 ret.size = nsize;
239 ret.asize = size;
240 rgn[retId].begin += nsize;
241 rgn[retId].size -= nsize;
242 return Id(*this,ret);
243 }
244 Range r;
245 r.begin = dataCpu.size();
246 r.size = nsize;
247 r.asize = size;
248
249 dataCpu.resize(dataCpu.size() + nsize);
250
251 blockCnt = (dataCpu.size()+blockSz-1)/blockSz;
252 durty.resize((blockCnt+32-1)/32, 0);
253 return Id(*this,r);
254 }
255
256bool InstanceStorage::realloc(Id& id, const size_t size) {
257 if(size==0) {
258 if(id.isEmpty())
259 return false;
260 id = Id(*this,Range());
261 return true;
262 }
263
264 if(size<=id.rgn.size) {
265 id.rgn.asize = size;
266 return false;
267 }
268
269 auto next = alloc(size);
270 if(id.isEmpty()) {
271 id = std::move(next);
272 return true;
273 }
274
275 auto data = dataCpu.data();
276 std::memcpy(data+next.rgn.begin, data+id.rgn.begin, id.rgn.asize);
277 for(size_t i=0; i<id.rgn.asize; ++i) {
278 bitSet(durty, (next.rgn.begin + i)/blockSz);
279 }
280 id = std::move(next);
281 return true;
282 }
283
284const Tempest::StorageBuffer& InstanceStorage::ssbo() const {
285 return dataGpu;
286 }
287
288void InstanceStorage::free(const Range& r) {
289 for(auto& i:rgn) {
290 if(i.begin+i.size==r.begin) {
291 i.size += r.size;
292 return;
293 }
294 if(r.begin+r.size==i.begin) {
295 i.begin -= r.size;
296 i.size += r.size;
297 return;
298 }
299 }
300 auto at = std::lower_bound(rgn.begin(),rgn.end(),r,[](const Range& l, const Range& r){
301 return l.begin<r.begin;
302 });
303 rgn.insert(at,r);
304 }
305
306void InstanceStorage::uploadMain() {
307 Workers::setThreadName("InstanceStorage upload");
308 while(true) {
309 std::unique_lock<std::mutex> lck(sync);
310 uploadCnd.wait(lck);
311 if(uploadFId==Resources::MaxFramesInFlight)
312 break;
313 if(uploadFId<0)
314 continue;
315
316 patchGpu[uploadFId].update(patchCpu);
317 uploadFId = -1;
318 }
319 }
Id & operator=(Id &&other) noexcept
void set(const Tempest::Matrix4x4 *anim)
Id alloc(const size_t size)
bool commit(Tempest::Encoder< Tempest::CommandBuffer > &cmd, uint8_t fId)
auto ssbo() const -> const Tempest::StorageBuffer &
bool realloc(Id &id, const size_t size)
@ MaxFramesInFlight
Definition resources.h:48
static Tempest::Device & device()
Definition resources.h:83
static void recycle(Tempest::DescriptorArray &&arr)
static Shaders & inst()
Definition shaders.cpp:39
static void setThreadName(const char *threadName)
Definition workers.cpp:66
static uint32_t nextPot(uint32_t x)
static void bitSet(std::vector< uint32_t > &b, size_t id)
static uint32_t nextPot(uint32_t v)
static bool bitAt(std::vector< uint32_t > &b, size_t id)
static uint32_t alignAs(uint32_t sz, uint32_t alignment)