If anyone's interested, the below Python code looks like it will exactly reproduce what JMP is doing in terms of calculating the confidence interval around a regression. This code block can be pasted into a Jupyter notebook directly. Besides calculating the Y- coordinates of the confidence interval for every X in the linear regression it does also create a plot, calculates the regression coefficient, intercept, associated uncertainties and tabulates the results.
Don't believe I wrote this myself, but still useful for learning.
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
# the data as array
x_coordinate = np.array([0, 1, 2, 3])
y_coordinate = np.array([0.0, 0.5, 1.2, 5.1])
# linear model
func = lambda x, a, b: a * x + b
# model fit
best_fit_ab, covar = curve_fit(func, x_coordinate, y_coordinate)
sigma_ab = np.sqrt(np.diagonal(covar))
# extract fitted parameters and uncertainties
a, b = best_fit_ab
a_err, b_err = sigma_ab
text_res = f"Best fit parameters:\na = {a:.10f} ± {a_err:.10f}\nb = {b:.10f} ± {b_err:.10f}"
print(text_res)
# calculate the standard error of the regression
residuals = y_coordinate - func(x_coordinate, a, b)
s_err = np.sqrt(np.sum(residuals**2) / (len(x_coordinate) - 2))
# define a function to calculate the confidence intervals
def predict_ci(x, func, popt, pcov, s_err, alpha=0.05): # adjust alpha here
from scipy.stats import t
df = len(x_coordinate) - 2
t_val = t.ppf(1 - alpha / 2, df) # t-value for the given confidence level and degrees of freedom
y = func(x, *popt)
ci = t_val * s_err * np.sqrt(1/len(x_coordinate) + (x - np.mean(x_coordinate))**2 / np.sum((x_coordinate - np.mean(x_coordinate))**2))
return y - ci, y + ci
# generate points for high-resolution x-axis
hires_x = np.linspace(x_coordinate.min(), x_coordinate.max(), 100)
# calculate confidence intervals
ci_lower_orig, ci_upper_orig = predict_ci(x_coordinate, func, best_fit_ab, covar, s_err)
# tabulate the original data and confidence intervals
table_orig = np.column_stack((x_coordinate, y_coordinate, ci_lower_orig, ci_upper_orig))
print("\nTabulated values (original data):")
print("X Coordinate\tY Coordinate\t\tCI Lower\t\tCI Upper")
for row in table_orig:
print(f"{row[0]:.2f}\t\t{row[1]:.10f}\t\t{row[2]:.10f}\t{row[3]:.10f}") # adjust decimals for tabulation
# plotting the data
plt.scatter(x_coordinate, y_coordinate, label='Data', color='blue')
# plotting the fitted model
plt.plot(hires_x, func(hires_x, *best_fit_ab), 'black', label='Fitted model')
# plotting the confidence intervals
ci_lower, ci_upper = predict_ci(hires_x, func, best_fit_ab, covar, s_err)
plt.fill_between(hires_x, ci_lower, ci_upper, color='black', alpha=0.15, label='Confidence interval')
# adding text with parameters
plt.text(x_coordinate.min() + 0.5, y_coordinate.max() - 0.5, text_res)
# customize plot
plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.title('Y Coordinate vs. X Coordinate')
plt.legend()
plt.show()