55 #include <thrust/copy.h>
56 #include <thrust/device_vector.h>
57 #include <thrust/host_vector.h>
58 #include <thrust/transform.h>
59 #include <thrust/iterator/zip_iterator.h>
60 #include <thrust/iterator/counting_iterator.h>
61 #include <thrust/sequence.h>
62 #include <thrust/for_each.h>
63 #include <thrust/tuple.h>
64 #include <thrust/extrema.h>
65 #include <thrust/count.h>
66 #include <thrust/fill.h>
67 #include <thrust/sort.h>
68 #include <thrust/iterator/counting_iterator.h>
70 #if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_TBB)
71 #include <thrust/system/cuda/execution_policy.h>
74 #include <thrust/system/omp/execution_policy.h>
76 #define TIMER CLOCK_REALTIME
78 #define CUDA_CHECK_RETURN(value){ \
79 cudaError_t _m_cudaStat = value; \
80 if (_m_cudaStat != cudaSuccess) { \
81 fprintf(stderr, "Error %s at line %d in file %s\n", \
82 cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \
94 if ((end.tv_nsec - start.tv_nsec) < 0) {
95 temp.tv_sec = end.tv_sec - start.tv_sec - 1;
96 temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
98 temp.tv_sec = end.tv_sec - start.tv_sec;
99 temp.tv_nsec = end.tv_nsec - start.tv_nsec;
120 fNDaughters(ndaughters), fNEvents(nevents), fMaxWeight(0) {
122 for (
GInt_t d = 0; d < fNDaughters; d++) {
123 fDaughters[d].resize(fNEvents);
126 fWeights.resize(fNEvents);
127 fAccRejFlags.resize(fNEvents);
175 fNDaughters(_Masses.size()), fNEvents(_NEvents),
178 RND_Time(0.0), EVT_Time(0.0), EXP_Time(0.0), fNAccepted(0)
181 if (_Masses.size() < 2 || _Masses.size() > 9) {
183 <<
"The number of daughter particles can not be (< 2 || > 9) or masses and names need to have the same size."
189 fMasses.resize(_Masses.size());
190 thrust::copy(_Masses.begin(), _Masses.end(), fMasses.begin());
194 fTeCmTm = _MotherMass;
196 for (
size_t n = 0; n < fNDaughters; n++) {
197 fTeCmTm -= _Masses[n];
200 cout <<
"Not enough energy for this decay. Exit." << endl;
213 fMasses.shrink_to_fit();
222 for (
GInt_t i = 0; i < fNDaughters; i++) {
223 fDaughters[i].clear();
224 fDaughters[i].shrink_to_fit();
229 fWeights.shrink_to_fit();
232 fAccRejFlags.clear();
233 fAccRejFlags.shrink_to_fit();
237 void Generate(
const Vector4R fMother);
244 return fDaughters[i];
311 void Export(
Events *_Events);
312 void ExportUnweighted(
Events *_Events);
326 for (
GInt_t i = 0; i < fNDaughters; i++)
327 fDaughters[i].resize(_nevents);
328 fWeights.resize(_nevents);
329 fAccRejFlags.resize(_nevents);
339 GReal_t x = (a - b - c) * (a + b + c) * (a - b + c) * (a + b - c);
340 x = sqrt(x) / (2 * a);
372 thrust::fill(fAccRejFlags.begin(), fAccRejFlags.end(),
kTrue);
379 thrust::counting_iterator<GLong_t> first(0);
380 thrust::counting_iterator<GLong_t> last = first + fNEvents;
383 thrust::transform(first, last, fWeights.begin(),
386 count = thrust::count(fAccRejFlags.begin(), fAccRejFlags.end(),
397 void PhaseSpace::ExportUnweighted(
Events *_Events) {
402 if(!fNAccepted) Unweight();
407 #if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_TBB
409 #pragma omp parallel num_threads( fNDaughters + 1 )
412 if (omp_get_thread_num() < fNDaughters )
414 thrust::copy_if( fDaughters[omp_get_thread_num()].begin(),fDaughters[omp_get_thread_num()].end(),
415 fAccRejFlags.begin(),
419 if (omp_get_thread_num() == fNDaughters )
421 thrust::copy_if(fWeights.begin(), fWeights.end(),
422 fAccRejFlags.begin(),
425 thrust::copy_if(fAccRejFlags.begin(), fAccRejFlags.end(),
426 fAccRejFlags.begin(),
435 cudaStream_t s[fNDaughters + 1];
437 for (
GInt_t d = 0; d <= fNDaughters; d++) {
440 cudaStreamCreateWithFlags(&s[d], cudaStreamNonBlocking));
445 thrust::copy_if( thrust::cuda::par.on(s[fNDaughters]),
446 fWeights.begin(), fWeights.end(),
447 fAccRejFlags.begin(),
451 thrust::copy_if( thrust::cuda::par.on(s[fNDaughters]),
452 fAccRejFlags.begin(), fAccRejFlags.end(),
453 fAccRejFlags.begin(),
457 for (
GInt_t d = 0; d < fNDaughters; d++) {
459 thrust::copy_if( thrust::cuda::par.on(s[d]),
460 fDaughters[d].begin(),fDaughters[d].end(),
461 fAccRejFlags.begin(),
466 cudaDeviceSynchronize();
467 for (
GInt_t d = 0; d <= fNDaughters; d++)
468 cudaStreamDestroy(s[d]);
474 void PhaseSpace::Export(
Events *_Events) {
480 #if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_TBB
482 #pragma omp parallel num_threads( fNDaughters + 1 )
485 if (omp_get_thread_num() < fNDaughters )
487 thrust::copy(fDaughters[omp_get_thread_num()].begin(),
488 fDaughters[omp_get_thread_num()].end(),
489 _Events->
fDaughters[omp_get_thread_num()].begin());
492 if (omp_get_thread_num() == fNDaughters )
494 thrust::copy(fWeights.begin(),
495 fWeights.end(), _Events->
fWeights.begin());
497 thrust::copy(fAccRejFlags.begin(),
506 cudaStream_t s[fNDaughters + 1];
508 for (
GInt_t d = 0; d <= fNDaughters; d++) {
511 cudaStreamCreateWithFlags(&s[d], cudaStreamNonBlocking));
514 cudaMemcpyAsync(thrust::raw_pointer_cast(_Events->
fWeights.data()),
515 thrust::raw_pointer_cast(fWeights.data()),
516 fWeights.size() *
sizeof(
GReal_t), cudaMemcpyDeviceToHost,
519 cudaMemcpyAsync(thrust::raw_pointer_cast(_Events->
fAccRejFlags.data()),
520 thrust::raw_pointer_cast(fAccRejFlags.data()),
521 fAccRejFlags.size() *
sizeof(
GBool_t), cudaMemcpyDeviceToHost,
524 for (
GInt_t d = 0; d < fNDaughters; d++) {
526 cudaMemcpyAsync(thrust::raw_pointer_cast(_Events->
fDaughters[d].data()),
527 thrust::raw_pointer_cast(fDaughters[d].data()),
528 fDaughters[d].size() *
sizeof(
Vector4R), cudaMemcpyDeviceToHost,
533 cudaDeviceSynchronize();
534 for (
GInt_t d = 0; d <= fNDaughters; d++)
535 cudaStreamDestroy(s[d]);
541 void PhaseSpace::Generate(
const Vector4R fMother) {
547 #if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_TBB)
548 cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
554 thrust::counting_iterator<GLong_t> first(0);
555 thrust::counting_iterator<GLong_t> last = first + fNEvents;
560 timespec time_event_start, time_event_end;
561 clock_gettime(
TIMER, &time_event_start);
563 switch (fNDaughters) {
567 thrust::transform(first, last,
568 thrust::make_zip_iterator(
569 thrust::make_tuple(fDaughters[0].begin(),
570 fDaughters[1].begin())), fWeights.begin(),
571 DecayMother(fMother, fMasses, fNDaughters, fSeed));
577 thrust::transform(first, last,
578 thrust::make_zip_iterator(
579 thrust::make_tuple(fDaughters[0].begin(),
580 fDaughters[1].begin(), fDaughters[2].begin())),
582 DecayMother(fMother, fMasses, fNDaughters, fSeed));
587 thrust::transform(first, last,
588 thrust::make_zip_iterator(
589 thrust::make_tuple(fDaughters[0].begin(),
590 fDaughters[1].begin(), fDaughters[2].begin(),
591 fDaughters[3].begin())), fWeights.begin(),
592 DecayMother(fMother, fMasses, fNDaughters, fSeed));
597 thrust::transform(first, last,
598 thrust::make_zip_iterator(
599 thrust::make_tuple(fDaughters[0].begin(),
600 fDaughters[1].begin(), fDaughters[2].begin(),
601 fDaughters[3].begin(), fDaughters[4].begin())),
603 DecayMother(fMother, fMasses, fNDaughters, fSeed));
609 thrust::transform(first, last,
610 thrust::make_zip_iterator(
611 thrust::make_tuple(fDaughters[0].begin(),
612 fDaughters[1].begin(), fDaughters[2].begin(),
613 fDaughters[3].begin(), fDaughters[4].begin(),
614 fDaughters[5].begin())), fWeights.begin(),
615 DecayMother(fMother, fMasses, fNDaughters, fSeed));
620 thrust::transform(first, last,
621 thrust::make_zip_iterator(
622 thrust::make_tuple(fDaughters[0].begin(),
623 fDaughters[1].begin(), fDaughters[2].begin(),
624 fDaughters[3].begin(), fDaughters[4].begin(),
625 fDaughters[5].begin(), fDaughters[6].begin())),
627 DecayMother(fMother, fMasses, fNDaughters, fSeed));
632 thrust::transform(first, last,
633 thrust::make_zip_iterator(
634 thrust::make_tuple(fDaughters[0].begin(),
635 fDaughters[1].begin(), fDaughters[2].begin(),
636 fDaughters[3].begin(), fDaughters[4].begin(),
637 fDaughters[5].begin(), fDaughters[6].begin(),
638 fDaughters[7].begin())), fWeights.begin(),
639 DecayMother(fMother, fMasses, fNDaughters, fSeed));
644 thrust::transform(first, last,
645 thrust::make_zip_iterator(
646 thrust::make_tuple(fDaughters[0].begin(),
647 fDaughters[1].begin(), fDaughters[2].begin(),
648 fDaughters[3].begin(), fDaughters[4].begin(),
649 fDaughters[5].begin(), fDaughters[6].begin(),
650 fDaughters[7].begin(), fDaughters[8].begin())),
652 DecayMother(fMother, fMasses, fNDaughters, fSeed));
658 clock_gettime(
TIMER, &time_event_end);
660 +
time_diff(time_event_start, time_event_end).tv_nsec * 1.0e-9));
663 RealVector_d::iterator w = thrust::max_element(fWeights.begin(),
674 #if !(THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_OMP || THRUST_DEVICE_SYSTEM==THRUST_DEVICE_BACKEND_TBB)
675 cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
678 if (fNEvents < fMothers.size())
679 cout <<
"fNEvents != fMothers.size()" << endl;
694 timespec time_event_start, time_event_end;
695 clock_gettime(
TIMER, &time_event_start);
699 thrust::counting_iterator<GLong_t> first(0);
700 thrust::counting_iterator<GLong_t> last = first + fNEvents;
703 switch (fNDaughters) {
707 thrust::transform(first, last,
708 thrust::make_zip_iterator(
709 thrust::make_tuple(fMothers.begin(),
710 fDaughters[0].begin(), fDaughters[1].begin())),
718 thrust::transform(first, last,
719 thrust::make_zip_iterator(
720 thrust::make_tuple(fMothers.begin(),
721 fDaughters[0].begin(), fDaughters[1].begin(),
722 fDaughters[2].begin())), fWeights.begin(),
728 thrust::transform(first, last,
729 thrust::make_zip_iterator(
730 thrust::make_tuple(fMothers.begin(),
731 fDaughters[0].begin(), fDaughters[1].begin(),
732 fDaughters[2].begin(), fDaughters[3].begin())),
739 thrust::transform(first, last,
740 thrust::make_zip_iterator(
741 thrust::make_tuple(fMothers.begin(),
742 fDaughters[0].begin(), fDaughters[1].begin(),
743 fDaughters[2].begin(), fDaughters[3].begin(),
744 fDaughters[4].begin())), fWeights.begin(),
750 thrust::transform(first, last,
751 thrust::make_zip_iterator(
752 thrust::make_tuple(fMothers.begin(),
753 fDaughters[0].begin(), fDaughters[1].begin(),
754 fDaughters[2].begin(), fDaughters[3].begin(),
755 fDaughters[4].begin(), fDaughters[5].begin())),
762 thrust::transform(first, last,
763 thrust::make_zip_iterator(
764 thrust::make_tuple(fMothers.begin(),
765 fDaughters[0].begin(), fDaughters[1].begin(),
766 fDaughters[2].begin(), fDaughters[3].begin(),
767 fDaughters[4].begin(), fDaughters[5].begin(),
768 fDaughters[6].begin())), fWeights.begin(),
774 thrust::transform(first, last,
775 thrust::make_zip_iterator(
776 thrust::make_tuple(fMothers.begin(),
777 fDaughters[0].begin(), fDaughters[1].begin(),
778 fDaughters[2].begin(), fDaughters[3].begin(),
779 fDaughters[4].begin(), fDaughters[5].begin(),
780 fDaughters[6].begin(), fDaughters[7].begin())),
787 thrust::transform(first, last,
788 thrust::make_zip_iterator(
789 thrust::make_tuple(fMothers.begin(),
790 fDaughters[0].begin(), fDaughters[1].begin(),
791 fDaughters[2].begin(), fDaughters[3].begin(),
792 fDaughters[4].begin(), fDaughters[5].begin(),
793 fDaughters[6].begin(), fDaughters[7].begin(),
794 fDaughters[8].begin())), fWeights.begin(),
801 clock_gettime(
TIMER, &time_event_end);
803 +
time_diff(time_event_start, time_event_end).tv_nsec * 1.0e-9));
806 RealVector_d::iterator w = thrust::max_element(fWeights.begin(),
GReal_t fMaxWeight
Maximum weight of the generated events.
long GLong_t
Signed long integer 4 bytes (long)
Events is a container struct to hold all the information corresponding the generated events...
void FreeResources()
Free resources.
BoolVector_h fAccRejFlags
Vector of flags. Accepted events are flagged 1 and rejected 0.
bool GBool_t
Boolean (0=false, 1=true) (bool)
void SetSeed(GInt_t _seed)
mc_host_vector< GReal_t > RealVector_h
Typedef for a GBool_t host vector.
mc_device_vector< Vector4R > Particles_d
Typedef for a GComplex_t device vector.
Typedef for useful container classes used in MCBooster.
GLong_t GetNAccepted() const
GReal_t GetEvtTime() const
Returns the time spent in seconds to process the random numbers and set the phase space four-vectors...
PhaseSpace(GReal_t _MotherMass, vector< GReal_t > _Masses, GLong_t _NEvents)
PhaseSpace ctor.
GReal_t PDK(const GReal_t a, const GReal_t b, const GReal_t c) const
PDK function.
const RealVector_d & GetWeights() const
Returns a device vector with the event weights.
Implementation of the event generator.
~PhaseSpace()
PhaseSpace() dtor.
Flags generated events as accepted (1) or rejected (0).
Events(GInt_t ndaughters, GLong_t nevents)
Constructor takes as parameters the number of particles and number of events.
mc_device_vector< GBool_t > BoolVector_d
Typedef for a STL vector of pointers to host RealVector_h vectors.
mc_host_vector< Vector4R > Particles_h
Typedef for a GComplex_t host vector.
GLong_t GetNEvents() const
Returns the number of events.
GReal_t GetMaxWeight() const
Returns the max event weight that can be found in the generated sample.
double GReal_t
Double 8 bytes or float 4 bytes.
timespec time_diff(timespec start, timespec end)
Function to calculate time intervals in seconds.
void Allocate(const GLong_t _nevents)
Allocate resources on the device for event generation.
GLong_t fNEvents
Number of events.
GReal_t GetRndTime() const
Returns the time spent in seconds to generate the random numbers necessary for the calculation...
mc_host_vector< GBool_t > BoolVector_h
Vector3R host vector.
Implements FlagAcceptReject.
GReal_t GetExpTime() const
Returns the time spent in seconds to export the generated events to a Events container.
GInt_t GetNDaughters() const
Returns the number of daughter particles.
int GInt_t
Signed integer 4 bytes (int)
#define CUDA_CHECK_RETURN(value)
RealVector_h fWeights
Vector of event weights.
Particles_h fDaughters[kMAXP]
Array of daughter particle vectors.
Particles_d & GetDaughters(GInt_t i)
Get the daughter with index 'i' in the mass array.
mc_device_vector< GReal_t > RealVector_d
Typedef for a GBool_t device vector.
GInt_t fNDaughters
Number of daughters.