Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions doc/distributions/students_t.qbk
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
RealType beta,
RealType sd,
RealType hint = 100);

// degrees of freedom inversion from a quantile and probability:
BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom(
RealType x,
RealType p);
};

}} // namespaces
Expand Down Expand Up @@ -106,6 +111,13 @@ For more information on this function see the
[@http://www.itl.nist.gov/div898/handbook/prc/section2/prc222.htm
NIST Engineering Statistics Handbook].

BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom(
RealType x,
RealType p);

Returns the degrees of freedom [nu] such that CDF(/x/; [nu]) = /p/.
Requires 0 < /p/ < 1 and /x/ [ne] 0, otherwise calls __domain_error.

[h4 Non-member Accessors]

All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
Expand Down Expand Up @@ -164,6 +176,7 @@ without the subtraction implied above.]]
[[skewness][if (v > 3) 0 else NaN ]]
[[kurtosis][if (v > 4) 3 * (v - 2) \/ (v - 4) else NaN]]
[[kurtosis excess][if (v > 4) 6 \/ (df - 4) else NaN]]
[[invert_probability_with_respect_to_degrees_of_freedom][Uses a 2nd-order Edgeworth expansion as initial guess for a numerical root finder]]
]

If the moment index /k/ is less than /v/, then the moment is undefined.
Expand Down
151 changes: 143 additions & 8 deletions include/boost/math/distributions/students_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <boost/math/distributions/fwd.hpp>
#include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x).
#include <boost/math/special_functions/digamma.hpp>
#include <boost/math/special_functions/relative_difference.hpp>
#include <boost/math/distributions/complement.hpp>
#include <boost/math/distributions/detail/common_error_handling.hpp>
#include <boost/math/distributions/normal.hpp>
Expand Down Expand Up @@ -58,6 +59,10 @@ class students_t_distribution
RealType sd,
RealType hint = 100);

BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom(
RealType x,
RealType p);

private:
// Data member:
RealType df_; // degrees of freedom is a real number > 0 or +infinity.
Expand Down Expand Up @@ -281,6 +286,11 @@ BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<student
// Parameter estimation follows:
//
namespace detail{

// Minimum degrees-of-freedom used as the warm-start fallback when the
// Edgeworth approximation yields no valid positive root or is inaccurate
constexpr double df_hint_fallback = 0.01;

//
// Functors for finding degrees of freedom:
//
Expand Down Expand Up @@ -308,6 +318,94 @@ struct sample_size_func
RealType alpha, beta, ratio;
};

template <class RealType, class Policy>
struct cdf_to_df_func
{
BOOST_MATH_GPU_ENABLED cdf_to_df_func(RealType x_val, RealType p_val, bool c)
: x(x_val), p(p_val), comp(c) {}

BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df)
{
students_t_distribution<RealType, Policy> t(df);
return comp ?
RealType(p - cdf(complement(t, x)))
: RealType(cdf(t, x) - p);
}
RealType x, p;
bool comp;
};

//
// Shared root-finding helper used by both find_degrees_of_freedom and
// invert_probability_with_respect_to_degrees_of_freedom.
//
template <class RealType, class Policy, class Func>
BOOST_MATH_GPU_ENABLED RealType solve_for_degrees_of_freedom(
Func f,
RealType hint,
bool rising,
const char* function,
Policy const&)
{
tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(
f, hint, RealType(2), rising, tol, max_iter, Policy());
RealType result = r.first + (r.second - r.first) / 2;
if (max_iter >= policies::get_max_root_iterations<Policy>())
{
return policies::raise_evaluation_error<RealType>(function, // LCOV_EXCL_LINE
"Unable to locate solution in a reasonable time: either there is no answer to " // LCOV_EXCL_LINE
"the degrees of freedom or the answer is infinite. Current best guess is %1%", // LCOV_EXCL_LINE
result, Policy()); // LCOV_EXCL_LINE
}
return result;
}

//
// Edgeworth warm-start for invert_probability_with_respect_to_degrees_of_freedom.
// On success writes a df estimate into 'result' and returns true.
// Returns false when no positive root is found; 'result' is left unchanged.
//
template <class RealType, class Policy>
BOOST_MATH_GPU_ENABLED bool approximate_df_with_edgeworth_expansion(RealType x, RealType p, RealType& result)
{
BOOST_MATH_STD_USING
// F(x; nu) ~ cdf_normal(x) - pdf_normal(x)*(x + x^3)/(4*nu) + pdf_normal(x)*(3x + 5x^3 + 7x^5 - 3x^7)/(96*nu^2)
// Substituting u = 1/nu and setting F(x; nu) = p gives b*u^2 - a*u + c = 0, where:
// a = pdf_normal(x) * (x + x^3) / 4
// b = pdf_normal(x) * (3x + 5x^3 + 7x^5 - 3x^7) / 96
// c = cdf_normal(x) - p
normal_distribution<RealType, Policy> std_normal(0, 1);
RealType pdf_val = pdf(std_normal, x);
RealType cdf_val = cdf(std_normal, x);

RealType x2 = x * x;
RealType x3 = x2 * x;
RealType x5 = x3 * x2;
RealType x7 = x5 * x2;

RealType a = pdf_val * (x + x3) / 4;
RealType b = pdf_val * (3 * x + 5 * x3 + 7 * x5 - 3 * x7) / 96;
RealType c = cdf_val - p;

RealType discriminant = a * a - 4 * b * c;
if (discriminant >= 0 && b != 0)
{
RealType sqrt_disc = sqrt(discriminant);
// The two roots of b*u^2 - a*u + c = 0. Pick the smallest positive u (= largest df = 1/u).
RealType u = (a - sqrt_disc) / (2 * b);
if (u <= 0)
u = (a + sqrt_disc) / (2 * b);
if (u > 0)
{
result = 1 / u;
return true;
}
}
return false;
}

} // namespace detail

template <class RealType, class Policy>
Expand All @@ -332,16 +430,53 @@ BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_
hint = 1;

detail::sample_size_func<RealType, Policy> f(alpha, beta, sd, difference_from_mean);
tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy());
RealType result = r.first + (r.second - r.first) / 2;
if(max_iter >= policies::get_max_root_iterations<Policy>())
return detail::solve_for_degrees_of_freedom(f, hint, false, function, Policy());
}

template <class RealType, class Policy>
BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::invert_probability_with_respect_to_degrees_of_freedom(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have a function for this here:

BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom(

However, you have a better initial guess to the root finder which would be a very welcome addition!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found this function but was not sure how to map the inversion problem we have here (finding degrees of freedom given a probability and a quantile) to this one arising from sample size calculation.

The signatures are:
find_degrees_of_freedom(RealType difference_from_mean, RealType alpha, RealType beta, RealType sd, RealType hint)
invert_probability_with_respect_to_degrees_of_freedom(RealType x, RealType p)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK, slightly different problems, I would still overload the existing function though for consistency and call it:

find_degrees_of_freedom(RealType t_stat, RealType p)

RealType x,
RealType p)
{
BOOST_MATH_STD_USING // for ADL of std functions
constexpr auto function = "boost::math::students_t_distribution<%1%>::invert_probability_with_respect_to_degrees_of_freedom";
//
// Check for domain errors:
//
RealType error_result;
if (false == detail::check_probability(function, p, &error_result, Policy()))
return error_result;

if (x == 0)
{
return policies::raise_evaluation_error<RealType>(function, "Unable to locate solution in a reasonable time: either there is no answer to how many degrees of freedom are required" // LCOV_EXCL_LINE
" or the answer is infinite. Current best guess is %1%", result, Policy()); // LCOV_EXCL_LINE
// CDF(0; df) = 0.5 for all df; only solvable when p == 0.5.
if (p == static_cast<RealType>(0.5))
return policies::raise_overflow_error<RealType>(function, 0, Policy());
return policies::raise_domain_error<RealType>(
function,
"No degrees of freedom can satisfy CDF(0; df) == %1% (must be 0.5).",
p, Policy());
}
return result;

// Edgeworth warm start: compute a df estimate; fall back to df_hint_fallback if it fails
// or is too inaccurate
RealType hint = static_cast<RealType>(detail::df_hint_fallback);
if (detail::approximate_df_with_edgeworth_expansion<RealType, Policy>(x, p, hint))
{
// Check that approximation is at least somewhat close;
// for small degrees of freedom it does not fail but is very inaccurate.
RealType relative_error_threshold = static_cast<RealType>(0.1);
students_t_distribution<RealType, Policy> t_approx(hint);
RealType p_approx = cdf(t_approx, x);
if (relative_difference(p_approx, p) > relative_error_threshold)
hint = static_cast<RealType>(detail::df_hint_fallback);
}
// Root-find on f(df) = CDF(x; df) - p.
// CDF(x; df) is strictly increasing in df for x > 0, decreasing for x < 0.
// Use the smaller of p and q=1-p to avoid cancellation near 1.
RealType q = 1 - p;
detail::cdf_to_df_func<RealType, Policy> f(x, p < q ? p : q, p < q ? false : true);
return detail::solve_for_degrees_of_freedom(f, hint, x > 0, function, Policy());
}

template <class RealType, class Policy>
Expand Down
125 changes: 125 additions & 0 deletions test/test_students_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,131 @@ void test_spots(RealType)
static_cast<RealType>(1.0))),
9);

// Tests for invert_probability_with_respect_to_degrees_of_freedom.
// Each case is derived from the CDF spot tests above: the exact df is known,
// so we verify that inverting CDF(x; df) = p recovers df to tight tolerance.
// float has ~7 significant digits; large-df inversion is ill-conditioned at that precision,
// so use a looser tolerance for single precision.
RealType tol_inv_df = std::is_same<RealType, float>::value
? static_cast<RealType>(0.1) // float: 0.1%
: static_cast<RealType>(0.01); // double+: 0.01%
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-6.96455673428326),
static_cast<RealType>(0.01)),
static_cast<RealType>(2),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-3.36492999890721),
static_cast<RealType>(0.01)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-0.559429644),
static_cast<RealType>(0.3)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(1.475884049),
static_cast<RealType>(0.9)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-1.475884049),
static_cast<RealType>(0.1)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-5.2410429995425),
static_cast<RealType>(0.00001)),
static_cast<RealType>(25),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(6.96455673428326),
static_cast<RealType>(0.99)),
static_cast<RealType>(2),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.610822886098362)),
static_cast<RealType>(0.1),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.777242554908434)),
static_cast<RealType>(0.5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.822925875908677)),
static_cast<RealType>(0.75),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.977114826753374)),
static_cast<RealType>(1000),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(1),
static_cast<RealType>(0.8413326478347855)),
static_cast<RealType>(1e4),
tol_inv_df * static_cast<RealType>(5)); // Looser tolerance for ill-conditioned problem with very large df
// Small df edge cases where approximation becomes problematic: would need different values for float
// For now just test double and long double to have test overage of these code paths
if (!std::is_same<RealType, float>::value)
{
// Small df case 1: Edgeworth expansion breaks down for small degrees of freedom, use fallback
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(1e20),
static_cast<RealType>(0.5244796002843015)),
static_cast<RealType>(1e-3),
tol_inv_df);
// Small df case 2: Edgeworth expansion succeeds but is inaccurate, use fallback
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2.0),
static_cast<RealType>(0.500000000644961)),
static_cast<RealType>(1e-10),
tol_inv_df);
}
// Domain error: p outside (0,1)
#ifndef BOOST_NO_EXCEPTIONS
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(-0.1)),
std::domain_error);
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(1.1)),
std::domain_error);
// x == 0, p != 0.5: no df can satisfy CDF(0; df) == p -> domain error
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(0),
static_cast<RealType>(0.3)),
std::domain_error);
// x == 0, p == 0.5: CDF(0; df) == 0.5 for all df -> overflow (infinite solutions)
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(0),
static_cast<RealType>(0.5)),
std::overflow_error);
#endif

// Test for large degrees of freedom when should be same as normal.
RealType inf = std::numeric_limits<RealType>::infinity();
RealType nan = std::numeric_limits<RealType>::quiet_NaN();
Expand Down
Loading