proc hpbin data=commercial_data numbin=10;
input annual_revenue;
target default_flag / level=nominal;
output out=binned_data;
run;
proc rank data=commercial_data out=ranked groups=20;
var annual_revenue;
ranks revenue_decile;
run;
proc sql;
create table bin_analysis as
select revenue_decile,
count(*) as n_obs,
sum(default_flag) as n_defaults,
mean(default_flag) as default_rate,
min(annual_revenue) as min_revenue,
max(annual_revenue) as max_revenue
from ranked
group by revenue_decile
order by revenue_decile;
quit;
/* Check monotonicity */
data monotonic_check;
set bin_analysis;
lag_default_rate = lag(default_rate);
if _n_ > 1 then do;
if default_rate <= lag_default_rate then monotonic_flag = 1;
else monotonic_flag = 0;
end;
run;
/* Test statistical significance */
proc freq data=final_bins;
tables revenue_bin*default_flag / chisq expected;
run;
/* Cochran-Armitage trend test */
proc freq data=final_bins;
tables revenue_bin*default_flag / trend;
run;