-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Extend templation in GenVector to better support vectorised Vc types. #394
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
76a8f9d
7e9c2b7
b15d62a
2b2fc17
2e74f0d
c3175c6
9bdfb8e
c97245c
f12d368
bbbbbd0
9a0ee77
371d50d
dcb0b30
29e7e30
14e73b4
27bbb57
196a00b
0ad5828
1928c75
5ccdaea
ef1aea9
6cac16a
68ba28a
939b7cb
d9b810b
776ce1a
fbe74d2
12b43b0
8367bdf
0f404ba
65a701d
4ba8a1b
ff94a5a
b8ca822
b32cb6a
febe783
a628a2a
89b2d26
6bf0368
bd96bd0
dea8d05
a18221a
eb0b8ed
7da1945
a271eab
decbe11
58c6315
17f42a3
434396d
b145899
ad8d3a5
b018a03
bb1e386
1786383
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,305 @@ | ||
|
|
||
| // STL | ||
| #include <random> | ||
| #include <vector> | ||
| #include <iostream> | ||
| #include <string> | ||
| #include <typeinfo> | ||
|
|
||
| // Vc | ||
| #include <Vc/Vc> | ||
|
|
||
| // ROOT | ||
| #include "Math/GenVector/PositionVector3D.h" | ||
| #include "Math/GenVector/DisplacementVector3D.h" | ||
| #include "Math/GenVector/Plane3D.h" | ||
| #include "Math/GenVector/Transform3D.h" | ||
|
|
||
| int compare(double v1, double v2, const std::string &name = "", double scale = 100.0) | ||
| { | ||
| // ntest = ntest + 1; | ||
|
|
||
| // numerical double limit for epsilon | ||
| const double eps = scale * std::numeric_limits<double>::epsilon(); | ||
| int iret = 0; | ||
| double delta = v2 - v1; | ||
| double d = 0; | ||
| if (delta < 0) delta = -delta; | ||
| if (v1 == 0 || v2 == 0) { | ||
| if (delta > eps) { | ||
| iret = 1; | ||
| } | ||
| } | ||
| // skip case v1 or v2 is infinity | ||
| else { | ||
| d = v1; | ||
|
|
||
| if (v1 < 0) d = -d; | ||
| // add also case when delta is small by default | ||
| if (delta / d > eps && delta > eps) iret = 1; | ||
| } | ||
|
|
||
| if (iret != 0) { | ||
| int pr = std::cout.precision(18); | ||
| std::cout << "\nDiscrepancy in " << name << "() : " << v1 << " != " << v2 << " discr = " << int(delta / d / eps) | ||
| << " (Allowed discrepancy is " << eps << ")\n"; | ||
| std::cout.precision(pr); | ||
| } | ||
| return iret; | ||
| } | ||
|
|
||
| // randomn generator | ||
| static std::default_random_engine gen; | ||
| // Distributions for each member | ||
| static std::uniform_real_distribution<double> p_x(-800, 800), p_y(-600, 600), p_z(10000, 10500); | ||
| static std::uniform_real_distribution<double> d_x(-0.2, 0.2), d_y(-0.1, 0.1), d_z(0.95, 0.99); | ||
| static std::uniform_real_distribution<double> c_x(3100, 3200), c_y(10, 15), c_z(3200, 3300); | ||
| static std::uniform_real_distribution<double> r_rad(8500, 8600); | ||
| static std::uniform_real_distribution<double> p0(-0.002, 0.002), p1(-0.2, 0.2), p2(0.97, 0.99), p3(-1300, 1300); | ||
|
|
||
| template <typename POINT, typename VECTOR, typename PLANE, typename FTYPE> | ||
| class Data { | ||
| public: | ||
| typedef std::vector<Data, Vc::Allocator<Data>> Vector; | ||
|
|
||
| public: | ||
| POINT position; | ||
| VECTOR direction; | ||
| POINT CoC; | ||
| PLANE plane; | ||
| FTYPE radius{0}; | ||
|
|
||
| public: | ||
| template <typename INDATA> | ||
| Data(const INDATA &ind) | ||
| : position(ind.position.x(), ind.position.y(), ind.position.z()), | ||
| direction(ind.direction.x(), ind.direction.y(), ind.direction.z()), CoC(ind.CoC.x(), ind.CoC.y(), ind.CoC.z()), | ||
| plane(ind.plane.A(), ind.plane.B(), ind.plane.C(), ind.plane.D()), radius(ind.radius) | ||
| { | ||
| } | ||
| Data() | ||
| : position(p_x(gen), p_y(gen), p_z(gen)), direction(d_x(gen), d_y(gen), d_z(gen)), | ||
| CoC(c_x(gen), c_y(gen), c_z(gen)), plane(p0(gen), p1(gen), p2(gen), p3(gen)), radius(r_rad(gen)) | ||
| { | ||
| } | ||
| }; | ||
|
|
||
| template <typename INDATA, typename OUTDATA> | ||
| void fill(const INDATA &in, OUTDATA &out) | ||
| { | ||
| out.clear(); | ||
| out.reserve(in.size()); | ||
| for (const auto &i : in) { | ||
| out.emplace_back(i); | ||
| } | ||
| } | ||
|
|
||
| template <typename POINT, typename VECTOR, typename FTYPE> | ||
| inline | ||
| typename std::enable_if<std::is_arithmetic<typename POINT::Scalar>::value && | ||
| std::is_arithmetic<typename VECTOR::Scalar>::value && std::is_arithmetic<FTYPE>::value, | ||
| bool>::type | ||
| reflectSpherical(POINT &position, VECTOR &direction, const POINT &CoC, const FTYPE radius) | ||
| { | ||
| constexpr FTYPE zero(0), two(2.0), four(4.0), half(0.5); | ||
| const FTYPE a = direction.Mag2(); | ||
| const VECTOR delta = position - CoC; | ||
| const FTYPE b = two * direction.Dot(delta); | ||
| const FTYPE c = delta.Mag2() - radius * radius; | ||
| const FTYPE discr = b * b - four * a * c; | ||
| const bool OK = discr > zero; | ||
| if (OK) { | ||
| const FTYPE dist = half * (std::sqrt(discr) - b) / a; | ||
| // change position to the intersection point | ||
| position += dist * direction; | ||
| // reflect the vector | ||
| // r = u - 2(u.n)n, r=reflection, u=incident, n=normal | ||
| const VECTOR normal = position - CoC; | ||
| direction -= (two * normal.Dot(direction) / normal.Mag2()) * normal; | ||
| } | ||
| return OK; | ||
| } | ||
|
|
||
| template <typename POINT, typename VECTOR, typename FTYPE> | ||
| inline | ||
| typename std::enable_if<!std::is_arithmetic<typename POINT::Scalar>::value && | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here. |
||
| !std::is_arithmetic<typename VECTOR::Scalar>::value && !std::is_arithmetic<FTYPE>::value, | ||
| typename FTYPE::mask_type>::type | ||
| reflectSpherical(POINT &position, VECTOR &direction, const POINT &CoC, const FTYPE radius) | ||
| { | ||
| const FTYPE two(2.0), four(4.0), half(0.5); | ||
| const FTYPE a = direction.Mag2(); | ||
| const VECTOR delta = position - CoC; | ||
| const FTYPE b = two * direction.Dot(delta); | ||
| const FTYPE c = delta.Mag2() - radius * radius; | ||
| FTYPE discr = b * b - four * a * c; | ||
| typename FTYPE::mask_type OK = discr > FTYPE::Zero(); | ||
| if (any_of(OK)) { | ||
| // Zero out the negative values in discr, to prevent sqrt(-ve) | ||
| discr(!OK) = FTYPE::Zero(); | ||
| // compute the distance | ||
| const FTYPE dist = half * (sqrt(discr) - b) / a; | ||
| // change position to the intersection point | ||
| position += dist * direction; | ||
| // reflect the vector | ||
| // r = u - 2(u.n)n, r=reflection, u=incident, n=normal | ||
| const VECTOR normal = position - CoC; | ||
| direction -= (two * normal.Dot(direction) / normal.Mag2()) * normal; | ||
| } | ||
| // return the mask indicating which results should be used | ||
| return OK; | ||
| } | ||
|
|
||
| template <typename POINT, typename VECTOR, typename PLANE> | ||
| inline typename std::enable_if<std::is_arithmetic<typename POINT::Scalar>::value && | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here. |
||
| std::is_arithmetic<typename VECTOR::Scalar>::value && | ||
| std::is_arithmetic<typename PLANE::Scalar>::value, | ||
| bool>::type | ||
| reflectPlane(POINT &position, VECTOR &direction, const PLANE &plane) | ||
| { | ||
| constexpr typename POINT::Scalar two(2.0); | ||
| const bool OK = true; | ||
| // Plane normal | ||
| const auto &normal = plane.Normal(); | ||
| // compute distance to the plane | ||
| const auto scalar = direction.Dot(normal); | ||
| const auto distance = -(plane.Distance(position)) / scalar; | ||
| // change position to reflection point and update direction | ||
| position += distance * direction; | ||
| direction -= two * scalar * normal; | ||
| return OK; | ||
| } | ||
|
|
||
| template <typename POINT, typename VECTOR, typename PLANE, typename FTYPE = typename POINT::Scalar> | ||
| inline typename std::enable_if<!std::is_arithmetic<typename POINT::Scalar>::value && | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here. |
||
| !std::is_arithmetic<typename VECTOR::Scalar>::value && | ||
| !std::is_arithmetic<typename PLANE::Scalar>::value, | ||
| typename FTYPE::mask_type>::type | ||
| reflectPlane(POINT &position, VECTOR &direction, const PLANE &plane) | ||
| { | ||
| const typename POINT::Scalar two(2.0); | ||
| const typename FTYPE::mask_type OK(true); | ||
| // Plane normal | ||
| const VECTOR normal = plane.Normal(); | ||
| // compute distance to the plane | ||
| const FTYPE scalar = direction.Dot(normal); | ||
| const FTYPE distance = -(plane.Distance(position)) / scalar; | ||
| // change position to reflection point and update direction | ||
| position += distance * direction; | ||
| direction -= two * scalar * normal; | ||
| return OK; | ||
| } | ||
|
|
||
| template <typename T> | ||
| using Point = ROOT::Math::PositionVector3D<ROOT::Math::Cartesian3D<T>, ROOT::Math::DefaultCoordinateSystemTag>; | ||
| template <typename T> | ||
| using Vector = ROOT::Math::DisplacementVector3D<ROOT::Math::Cartesian3D<T>, ROOT::Math::DefaultCoordinateSystemTag>; | ||
| template <typename T> | ||
| using Plane = ROOT::Math::Impl::Plane3D<T>; | ||
|
|
||
| int main(int /*argc*/, char ** /*argv*/) | ||
| { | ||
| int ret = 0; | ||
|
|
||
| // must be multiple of 8 | ||
| // const unsigned int nPhotons = 96 * 1e2; | ||
| const unsigned int nPhotons = 8; | ||
| // const unsigned int nPhotons = 8; | ||
| std::cout << "Creating " << nPhotons << " random photons ..." << std::endl; | ||
|
|
||
| // Scalar Types | ||
| Data<Point<double>, Vector<double>, Plane<double>, double>::Vector scalar_data(nPhotons); | ||
|
|
||
| // Vc Types | ||
| Data<Point<Vc::double_v>, Vector<Vc::double_v>, Plane<Vc::double_v>, Vc::double_v>::Vector vc_data; | ||
| // Clone the exact random values from the Scalar vector | ||
| // Note we are making the same number of entries in the container, but each entry is a vector entry | ||
| // with Vc::double_t::Size entries. | ||
| fill(scalar_data, vc_data); | ||
|
|
||
| // Loop over the two containers and compare | ||
| std::cout << "Ray Tracing :-" << std::endl; | ||
| for (size_t i = 0; i < nPhotons; ++i) { | ||
| auto &sc = scalar_data[i]; | ||
| auto &vc = vc_data[i]; | ||
|
|
||
| // ray tracing | ||
| reflectSpherical(sc.position, sc.direction, sc.CoC, sc.radius); | ||
| reflectPlane(sc.position, sc.direction, sc.plane); | ||
| reflectSpherical(vc.position, vc.direction, vc.CoC, vc.radius); | ||
| reflectPlane(vc.position, vc.direction, vc.plane); | ||
|
|
||
| std::cout << "Position " << sc.position << " " << vc.position << std::endl; | ||
| std::cout << "Direction " << sc.direction << " " << vc.direction << std::endl; | ||
|
|
||
| for (std::size_t j = 0; j < Vc::double_v::Size; ++j) { | ||
| ret |= compare(sc.position.x(), vc.position.x()[j]); | ||
| ret |= compare(sc.position.y(), vc.position.y()[j]); | ||
| ret |= compare(sc.position.z(), vc.position.z()[j]); | ||
| ret |= compare(sc.direction.x(), vc.direction.x()[j]); | ||
| ret |= compare(sc.direction.y(), vc.direction.y()[j]); | ||
| ret |= compare(sc.direction.z(), vc.direction.z()[j]); | ||
| } | ||
| } | ||
|
|
||
| // Now test Transformation3D | ||
| std::cout << "Transforms :-" << std::endl; | ||
| for (size_t i = 0; i < nPhotons; ++i) { | ||
| auto &sc = scalar_data[i]; | ||
| auto &vc = vc_data[i]; | ||
|
|
||
| // make 6 random scalar Points | ||
| Point<double> sp1(p_x(gen), p_y(gen), p_z(gen)); | ||
| Point<double> sp2(p_x(gen), p_y(gen), p_z(gen)); | ||
| Point<double> sp3(p_x(gen), p_y(gen), p_z(gen)); | ||
| Point<double> sp4(p_x(gen), p_y(gen), p_z(gen)); | ||
| Point<double> sp5(p_x(gen), p_y(gen), p_z(gen)); | ||
| Point<double> sp6(p_x(gen), p_y(gen), p_z(gen)); | ||
| // clone to Vc versions | ||
| Point<Vc::double_v> vp1(sp1.x(), sp1.y(), sp1.z()); | ||
| Point<Vc::double_v> vp2(sp2.x(), sp2.y(), sp2.z()); | ||
| Point<Vc::double_v> vp3(sp3.x(), sp3.y(), sp3.z()); | ||
| Point<Vc::double_v> vp4(sp4.x(), sp4.y(), sp4.z()); | ||
| Point<Vc::double_v> vp5(sp5.x(), sp5.y(), sp5.z()); | ||
| Point<Vc::double_v> vp6(sp6.x(), sp6.y(), sp6.z()); | ||
|
|
||
| // Make transformations from points | ||
| // note warnings about axis not having the same angles expected here... | ||
| // point is to check scalar and vector versions do the same thing | ||
| ROOT::Math::Impl::Transform3D<double> st(sp1, sp2, sp3, sp4, sp5, sp6); | ||
| ROOT::Math::Impl::Transform3D<Vc::double_v> vt(vp1, vp2, vp3, vp4, vp5, vp6); | ||
|
|
||
| // transform the vectors | ||
| const auto sv = st * sc.direction; | ||
| const auto vv = vt * vc.direction; | ||
| std::cout << "Transformed Direction " << sv << " " << vv << std::endl; | ||
|
|
||
| // invert the transformations | ||
| st.Invert(); | ||
| vt.Invert(); | ||
|
|
||
| // Move the points back | ||
| const auto sv_i = st * sv; | ||
| const auto vv_i = vt * vv; | ||
| std::cout << "Transformed Back Direction " << sc.direction << " " << sv_i << " " << vv_i << std::endl; | ||
|
|
||
| for (std::size_t j = 0; j < Vc::double_v::Size; ++j) { | ||
| ret |= compare(sv.x(), vv.x()[j]); | ||
| ret |= compare(sv.y(), vv.y()[j]); | ||
| ret |= compare(sv.z(), vv.z()[j]); | ||
| ret |= compare(sc.direction.x(), vv_i.x()[j]); | ||
| ret |= compare(sc.direction.y(), vv_i.y()[j]); | ||
| ret |= compare(sc.direction.z(), vv_i.z()[j]); | ||
| } | ||
|
|
||
| ret |= compare(sc.direction.x(), sv_i.x()); | ||
| ret |= compare(sc.direction.y(), sv_i.y()); | ||
| ret |= compare(sc.direction.z(), sv_i.z()); | ||
| } | ||
|
|
||
| if (ret) | ||
| std::cerr << "test FAILED !!! " << std::endl; | ||
| else | ||
| std::cout << "test OK " << std::endl; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am wondering if we could assert if the performance of the vectorized code is not better from the non-vectorized (of course in some margins)? Out of curiosity: how much did the vectorized code gain here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The point of that test was not to check CPU performance, but to assert that the same results are obtained in both cases. Note in that test the two containers have exactly the same number of entries, so the time to compute each should be more or less the same. The advantage from Vc comes from the fact each entry in the Vc container is infact a vector type, so has internally I actually gave a talk on this to a small LHCb meeting last week. I'll happily upload the sides here. The bottom line is for the nice idealised case I get effectively the perfect speed up from the SIMD address sizes (so 2,4 or 8 depending on the SIMD instruction set used and double or float).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for explaining! Nice slides! I'd appreciate it if you could add a speed check. Makes me much more comfortable and makes it clear (from history point of view) what is the value of this PR.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. It will probably make sense to do it as separate test, as to properly measure the time gain needs a different setup to that test. Can you point me at examples of how you measure execution time in the root tests, are there examples where this is done ? |
||
| return ret; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cjones051073 it seems that we omitted transforming this code here as a default template parameter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed, I over looked these instances of enable_if. will change them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. I guess I should open a new PR for this small fix, as this one is closed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. see PR #427.