61 auto data =
reinterpret_cast<Matrix4x4*
>(owner->dataCpu.data() + rgn.begin);
62 std::memcpy(data, mat, rgn.asize);
64 for(
size_t i=0; i<rgn.asize; i+=blockSz)
65 bitSet(owner->durty, (rgn.begin+i)/blockSz);
83 auto src =
reinterpret_cast<const uint8_t*
>(data);
84 auto dst = (owner->dataCpu.data() + rgn.begin + offset);
86 if(std::memcmp(src, dst, size)==0)
89 if((offset%blockSz)==0) {
90 for(
size_t i=0; i<size; i+=blockSz) {
91 const size_t sz = std::min(blockSz, size-i);
92 std::memcpy(dst+i, src+i, sz);
93 bitSet(owner->durty, (rgn.begin + offset + i)/blockSz);
96 for(
size_t i=0; i<size; ++i) {
98 bitSet(owner->durty, (rgn.begin + offset + i)/blockSz);
127 std::atomic_thread_fence(std::memory_order_acquire);
130 const size_t dataSize = (dataCpu.size() + 0xFFF) & ~
size_t(0xFFF);
131 if(dataGpu.byteSize()!=dataSize) {
133 dataGpu = device.ssbo(BufferHeap::Device,Tempest::Uninitialized,dataSize);
134 dataGpu.update(dataCpu);
135 std::memset(durty.data(), 0, durty.size()*
sizeof(uint32_t));
140 size_t payloadSize = 0;
141 for(
size_t i = 0; i<blockCnt; ++i) {
142 if(i%32==0 && durty[i/32]==0) {
155 uint32_t size = uint32_t((i-begin)*blockSz);
156 uint32_t chunkSz = 256;
159 p.dst = uint32_t(begin*blockSz);
160 p.src = uint32_t(payloadSize);
162 p.size = std::min<uint32_t>(size, chunkSz);
164 patchBlock.push_back(p);
166 payloadSize += p.size;
171 std::memset(durty.data(), 0, durty.size()*
sizeof(durty[0]));
173 if(patchBlock.size()==0)
176 const size_t headerSize = patchBlock.size()*
sizeof(Path);
177 patchCpu.resize(headerSize + payloadSize);
178 for(
auto& i:patchBlock) {
179 i.src += uint32_t(headerSize);
180 std::memcpy(patchCpu.data()+i.src, dataCpu.data() + i.dst, i.size);
187 std::memcpy(patchCpu.data(), patchBlock.data(), headerSize);
189 auto& path = patchGpu[fId];
190 if(path.byteSize() < headerSize + payloadSize) {
191 path = device.ssbo(BufferHeap::Upload, Uninitialized, headerSize + payloadSize);
195 std::unique_lock<std::mutex> lck(sync);
198 uploadCnd.notify_one();
201 cmd.setFramebuffer({});
202 cmd.setBinding(0, dataGpu);
203 cmd.setBinding(1, path);
205 cmd.dispatch(patchBlock.size());
219 return Id(*
this,Range());
222 for(
size_t i=0; i<rgn.size(); ++i) {
223 if(rgn[i].size==nsize) {
226 rgn.erase(rgn.begin()+intptr_t(i));
227 return Id(*
this,ret);
230 size_t retId = size_t(-1);
231 for(
size_t i=0; i<rgn.size(); ++i) {
232 if(rgn[i].size>nsize && (retId==
size_t(-1) || rgn[i].size<rgn[retId].size)) {
236 if(retId!=
size_t(-1)) {
237 Range ret = rgn[retId];
240 rgn[retId].begin += nsize;
241 rgn[retId].size -= nsize;
242 return Id(*
this,ret);
245 r.begin = dataCpu.size();
249 dataCpu.resize(dataCpu.size() + nsize);
251 blockCnt = (dataCpu.size()+blockSz-1)/blockSz;
252 durty.resize((blockCnt+32-1)/32, 0);
300 auto at = std::lower_bound(rgn.begin(),rgn.end(),r,[](
const Range& l,
const Range& r){