/* PseudoFailureTime.jsl 24Mar2009 Copyright (c) 2008 by SAS Institute Inc., Cary, NC 27513, USA. All rights reserved. Note: please read the disclaimer at the end of this script. Purpose This script takes a repeated measures data table (a table with multiple rows per unit) and turns it into a data table with 1 row per unit and a Pseudo Failure time. The original data table must contain: 1) A column that identifies each unit, 2) a variable that measures time (can be use such as miles driven), and 3) a response column. Author Melinda Thielbar (SAS Institute) Contact Melinda.Thielbar@jmp.com Usage Simply run this script by any one of these methods: Edit > Run Script Control-R Click "Run Script" button in tool bar Future Improvement Ideas None at this time. */ //Read.ME information for this script. //*******************General information //This script takes a repeated measures data table (a table with multiple rows per unit) //and turns it into a data table with 1 row per unit and a Pseudo Failure time. //The original data table must contain: 1) A column that identifies each unit, //2) a variable that measures time (can be use such as miles driven), and 3) a //response column. //*****************Initial Dialog //The initial dialog allows you to select the response, time measure //(can be time or a useage measure such as miles driven), and a unit identifier. //You may also specify 1 group-by variable. If a //group by variable is specified, the user can select a different transformation //for each level of the grouping variable //The radio box at the lower left allows the user to specify whether the plots //should display a regression line or connect each point in the series for each //unit. This affects the display graphs but does not alter how the internal calculations //are performed //************************Select Transformation window //After the initial variables are specified, the script opens a window displaying the //relationship between the time variable and the response. Transformations for time //and response can be selected frop the drop-down lists at the side of the //plots. The plots will update when transformations are //selected to show the new relationship. //If a group by variable is specified, there will be a separate plot for each //level of the grouping variable. //When the user has found a transformation(s) that causes a linear relationship for //transformed response vs transformed time, he/she can select //"Create Pseudo Failure Time Data" //to create the new data table. //For more information about the Select Transformation window, select the Help //button in the dialog box. //**********************Create Data Table Dialog //This dialog box allows you to enter the failure threshold, //specify additional covariates (optional), and specify a column in the data //table that controls the size of the pseudo failure time. //The resulting data table will have ONE ROW per Unit ID. //Details //Covariates for Failure Model (optional): // An optional list of covariates to be included in the failure // time model. These columns are copied from the original data table to the // Pseudo failure time table. They do not affect calculation // If there are multiple measures for each covariate, the // LAST VALUE is included in the final table. At this time, the script does // not support time-dependent covariates //Maximum Predicted Time (optional) // An optional column containing maximum predicted times. // If the predicted failure time from the linear models is greater // than the value in this column, the pseudo failure time is the // maximum predicted time from this column. //Censoring Column (optional) // A column in the data table that has a 0 for observations that failed during testing // and a 1 otherwise. If this column is specified, any unit that has a 1 for this column // will be considered a "hard failure", and the pseudo failure time will be the time of the // last measurement taken on this variable. //Failure Threshold (required) // The value of the response for which the unit would have been considered Failed. //***************Details on Inputs, outputs, and internal calculations //Inputs: // * Data table, as described in assumptions // --Will use active table or script will prompt for open // * Failure Level: damage level at which the component is considered "failed" // --User will type into a labeled box // * Censor Time (optional): if predicted time exceedes this time, the component // should be considered "censored" // --The data table will have a column with the maximum predicted time // the user will allow for each unit // * Transformation of time and/or response // --User will select from a list // --User will be shown a plot of transformed and predicted values // and allowed to change selection //Outputs: // * Interactive plot. Allows the user to visually fit // linear transformation // * Data table containing the following: // --a row for each component // --"Predicted Time": a numeric column containing the predicted failure time // --a numeric column containing the original failure time // --"Pseudo Failure Time" a numeric column containing the pseudo failure time. // This is the time predicted by the linear model or, // if censoring variables are specified, // the appropriate time of failure. This variable is based on a formula that // assigns either the original // time or the predicted time based on the user's specifications. // --"Censored" A 0,1 variable indicating if the unit is censored (1) or not // censored (0). This is the variable that should be specified as the // censoring variable in the Life Distribution platform or other survival // model. // --"Estimate Type": an ordinal colum indicating if the row is censored in // one of the following ways // --failed during testing: 0 // (In this case, the modeling time is set to the last time that // unit was measured). // --did not fail during testing, and predicted time makes sense: 1 // (pseudo failure time = predicted time) // --predicted censoring time is longer than the user's specified time: 2 // (in this case, the modeling time is set to the maximum time // specified by the user) // --there was only one row for the unit--prediction was impossible: 3 // (in this case, the modeling time is to the last time the unit was measured ). // --in some cases, the calculation for predicted time will result in a time // that is too large for JSL to handle. In that case, Pseudo fialure is set to // the maximum time or to the last time the unit was measured if // there is no maximum time column // --covariates from the original data table. There is a dialog that allows the user // to select covariates that will be used in a later analysis. // * Time-dependent covariates are NOT SUPPORTED. The last values measured for each // component appears in the data table. //Calculations: // * Linear transformation of the input values // * Predicted failure time using the linear transformation // --the model calculates a separate line for each unit // --the script performs the calculation using JSL's matrix language //Assumptions: // * The data table is in the following form: // --1 row for each measurement on each item (repeated measures) // --Contains a time column specifiying when each measure was taken // --Contains a censoring column (0,1) indicating if the component // failed during testing. // --If the last measure for the unit has censoring=0, // the new data table will use the failure time given in the table and not the // predicted failure time. // * There is an appropriate linear transformation that works for // all rows or all rows within a by-group (i.e. we do not have situation // where some rows require a // log transformation and others require a power transformation) //Platforms Used: // * The Bivariate platform is used to create the plots that allow the user to select // the appropriate transformation. Fit Each Value is used to allow the user to see the // if the fit is approximately linear. // /******Associative Map for Transformations***********/ clear globals(); TransformationMap = Associative Array({ {1,"No Transformation"}, {2,"Inverse"}, {3, "Log"}, {4,"Exponential"}, {5,"Squared"}, {6,"Square Root"}, {7,"Squared Inverse"}, {8,"Tanh"}, {9,"Coth"}, {10,"Csch"} }); /******End Associative Map for Transformations***********/ /****************Create DT Expression***************************/ CreateDTExpression=expr( //Create a background data table with the needed variables //Stops name collisions and other problems with the user's table dt = userTable << Subset(Copy formula( 0 ), columns(eval(initialList)) ); unitColumn = init_dlg["id_col"][1]; XD = Design(::unitColumn< 1 & j < num_rows, if (lastRowVal != ::unitColumn, SortedTable << select rows(j-1) ), j==num_rows, SortedTable << select rows(j) ); lastRowVal= ::unitColumn; ); //Create the output table by subsetting on the //selected rows OutputTable = SortedTable << Subset(Copy formula( 0 ), columns(eval(AllVariables)), Output Table("Pseudo Failure Time Data")); close(sortedTable,nosave); numOutputRows = nrows(outputtable); //Create a column vector to hold the predicted //values of time. PredictedHolderVector = transpose(index(1,numOutputRows) ); if(CreateTableDialog["predictionType"]==2, PredictedLowVector = transpose(index(1,numOutputRows) ); PredictedHighVector = transpose(index(1,numOutputRows) ); ); //Add a column of 1's for least squares dt << newcolumn("ColumnOfOnes",formula(1)); //Create a separate OLS model for each unitID. //Have to go to dt (which has the transformed values), //use Design function to get a design matrix for Unit ID Xcolnames={"ColumnOfOnes","timetransvar"}; nXcols=2; current data table(dt); for (ilevel=1, ilevel<=numunits, ilevel++, rows = loc(xd[0,ilevel]); row1=rows[1]; if (nitems(ResponseTransformCurrent) > 1, transformNumber=loc(GroupD[row1,0]), transformNumber={1} ); TransformedFailThresh=TransformValue(ResponseTransformCurrent[transformNumber[1]], FailThreshVal); //gets the threshold value given in the //dialog and transforms it into the appropriate value for(icol=1,icol<=nXcols,icol++, datac = (column(Xcolnames[icol])< 1, //Get a vector of transformed response values yvector= (column("ResponseTransVar")< Max Time","Predict <0"})); //The conditions needed for the censoring column //depend on the columns the user specifies. //The formula for the censoring column are built based on user's input. if (CreateTableDialog["predictionType"]==1, eval(parse("newcensoredcolumn << set formula(if("|| if (noCensored != 1, ":OldCensored==0, 0,", " ")|| "ismissing(:PredictedTime) | :predictedTime==-1, 2, :predictedTime<0,4"|| if (timeThresholdNeeded==0," ", timeThresholdNeeded==1,", :predictedTime > "||char(timethreshval)||", 3", timeThresholdNeeded==2,", :predictedTime > :TimeThreshold, 3" ," ")|| ",1,1));" )), eval(parse("newcensoredcolumn << set formula( if ("|| if (noCensored != 1, ":OldCensored==0, 0,", " ")|| "ismissing(:PredictedTimeUpperLimit)|ismissing(:PredictedTimeLowerLimit), 2, :predictedTime<0,4"|| if (timeThresholdNeeded==0," ", timeThresholdNeeded==1,", :PredictedTimeUpperLimit > "||char(timethreshval)||", 3", timeThresholdNeeded==2,", :PredictedTimeUpperLimit > :TimeThreshold, 3" ," ")|| ",1,1));" ))); //Need a 0,1 censored column for use in Life Distribution outputtable << NewColumn("Censored",formula(match(:EstimateType, 0,0,1,0,1), )); newcensoredcolumn < "||char(timethreshval)||","||char(timethreshval), timeThresholdNeeded==2,", :predictedTime > :TimeThreshold, :TimeThreshold", " ")|| ",1,:PredictedTime));" )), NewYlow=outputtable << New Column("PseudoFailureLowerLimit"); eval(parse("Newylow << set formula(if ("|| if (noCensored != 1, ":OldCensored==0,"||char(init_dlg["time_col"][1])||","," ")|| "ismissing(:PredictedTimeLowerLimit) | :predictedTimeLowerLimit==-1, "|| char(init_dlg["time_col"][1])|| ",:predictedTimeLowerLimit<0,0"|| if (timeThresholdNeeded==1,", :predictedTimeLowerLimit > "||char(timethreshval)||", "|| char(timethreshval), timeThresholdNeeded==2,", :predictedTimeLowerLimit > :TimeThreshold, :TimeThreshold", " ")|| ",1,:PredictedTimeLowerLimit));" )); NewYhigh=outputtable << New Column("PseudoFailureUpperLimit"); eval(parse("Newyhigh << set formula(if ("|| if (noCensored != 1, ":OldCensored==0,"||char(init_dlg["time_col"][1])||", "," ")|| "ismissing(:PredictedTimeUpperLimit) | :predictedTimeUpperLimit==-1, "|| char(init_dlg["time_col"][1])|| ",:predictedTimeUpperLimit<0,0"|| if (timeThresholdNeeded==1,", :predictedTimeLowerLimit > "||char(timethreshval)||", "|| "."|| ", :predictedTimeUpperLimit > "||char(timethreshval)||", "|| char(timethreshval), timeThresholdNeeded==2,", :predictedTimeLowerLimit > :TimeThreshold, :TimeThreshold"||", :predictedTimeUpperLimit > :TimeThreshold, :TimeThreshold", " ")|| ",1,:PredictedTimeUpperLimit));" )); ); //**********Set properties for output table //Create a life distribution for resulting table //OutputTable <