Commit 17ce6f00 authored by Alexander Gerwing's avatar Alexander Gerwing

Merge branch '70-figure-out-why-rv-times-out-with-accumulation-of-results-on-host' into 'master'

Resolve "Figure out why RV times out with accumulation of results on host"

Closes #83 and #70

See merge request !100
parents 5c2f1c3b edfbce06
Pipeline #37629 passed with stage
in 7 minutes and 39 seconds
......@@ -19,17 +19,21 @@ namespace PPS
template<typename, int>
friend class GpuGrid;
public:
GpuElement(size_t vertex_count, size_t vertex_offset,
GpuElement(size_t index, size_t vertex_count, size_t vertex_offset,
Dune::FieldVector<ct, dim>*[[pacxx::device_memory]] dbp_vertices)
: vertex_count_(vertex_count), vertex_offset_(vertex_offset), dbp_vertices_(dbp_vertices)
: element_index_(index), vertex_count_(vertex_count),
vertex_offset_(vertex_offset), dbp_vertices_(dbp_vertices)
{ }
// Returns the number of corners
size_t corners() { return vertex_count_; }
// Returns the corner
Dune::FieldVector<ct, dim>& corner(int i) { return dbp_vertices_[vertex_offset_ + i]; }
// Returns the index of the element
size_t index() { return element_index_; }
private:
size_t vertex_count_;
size_t vertex_offset_;
size_t element_index_;
Dune::FieldVector<ct, dim>*[[pacxx::device_memory]] dbp_vertices_;
};
......@@ -91,6 +95,7 @@ namespace PPS
std::vector<Dune::FieldVector<ct, dim>> temp_vertices;
std::vector<size_t> temp_indices;
std::vector<size_t> temp_elements;
std::vector<size_t> temp_element_indices;
const auto &iset = gv.indexSet();
// Iterate over the elements
......@@ -99,6 +104,7 @@ namespace PPS
auto geo = element.geometry();
temp_elements.push_back(geo.corners());
temp_elements.push_back(temp_vertices.size());
temp_element_indices.push_back(iset.index(element));
for (size_t i = 0; i < geo.corners(); ++i)
{
temp_vertices.push_back(geo.corner(i));
......@@ -119,7 +125,10 @@ namespace PPS
// Create and upload gpu elements
std::vector<GpuElement<ct, dim>> temp_gpu_elements;
for (size_t i = 0; i < num_elements_; ++i)
temp_gpu_elements.push_back(GpuElement<ct, dim>(temp_elements[i * 2], temp_elements[i * 2 + 1], db_vertices_->get()));
{
temp_gpu_elements.push_back(GpuElement<ct, dim>(
temp_element_indices[i], temp_elements[i * 2], temp_elements[i * 2 + 1], db_vertices_->get()));
}
db_elements_ = &exec.allocate<GpuElement<ct, dim>>(temp_gpu_elements.size());
db_elements_->upload(temp_gpu_elements.data(), temp_gpu_elements.size());
......
......@@ -105,8 +105,10 @@ namespace PPS {
gpu_context_->w_array.upload(w_tmp.data(), w_tmp.size());
// Reset yl
/*
std::vector<typename Range::field_type> yl_tmp(gpu_context_->grid.numElements() * size);
gpu_context_->yl_array.upload(yl_tmp.data(), yl_tmp.size());
*/
// Get pointer
auto dp_z = gpu_context_->z_array.getInstance();
......@@ -134,7 +136,7 @@ namespace PPS {
// make sure local containers are initialized where needed
// gather coeffecients from global into local z and w
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < LB::size(); ++i)
{
size_t gi = dp_grid->subIndex(element, i);
zl[i] = dp_z->get(gi);
......@@ -156,13 +158,13 @@ namespace PPS {
// use subcription in place of dereference to avoid pacxx warning
dp_lop->jacobian_apply_volume(multi_linear_geometry, dp_rule[0], dp_lb[0], zl, wl, yl);
for (size_t i = 0; i < size; ++i)
dp_yl->data()[i * dp_grid->elements() + global_id] += yl[i];
for (size_t i = 0; i < LB::size(); ++i)
dp_yl->data()[i * dp_grid->elements() + element.index()] = yl[i];
}
};
// Execute gpu kernel
size_t num_workitems = 256;
size_t num_workitems = 32;
size_t num_workgroups = (gpu_context_->grid.numElements() / num_workitems) +
(gpu_context_->grid.numElements() % num_workitems != 0 ? 1 : 0);
auto &executor = pacxx::v2::Executor::get(0);
......@@ -171,10 +173,11 @@ namespace PPS {
// Scatter local vector to global vector
// write entries without considering constraints.
// Dirichlet-constrained rows will be fixed in a postprocessing step.
std::vector<typename Range::field_type> yl_tmp(gpu_context_->grid.numElements() * LB::size());
gpu_context_->yl_array.download(yl_tmp.data(), yl_tmp.size());
const auto& iset = gv_.indexSet();
for (const auto& element : elements(gv_))
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < LB::size(); ++i)
y[iset.subIndex(element, i, element.dimension)] +=
trafo(yl_tmp[i * gpu_context_->grid.numElements() + iset.index(element)]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment