hi Ian
first of all thanks for your help and suggestions. your script work very well
So.... I tried to integrate your script into a more big one, in order to reduce and simply DB.
here the entire work
Names Default To Here( 1 );
clear globals();
// code to choose among all opened tables which one will be selected for the analysis
OpenDT = List();
For( i = 1, i <= N Table(), i++,
Insert Into( OpenDT, Data Table( i ) )
);
dlgDT = Dialog( "Select Data Table for analysis", VList( selectDT = List Box( OpenDT ) ), HList( Button( "OK" ), Button( "Cancel" ) ) );
Show( dlgDT );
If( dlgDT["button"] == 1,
Remove From( dlgDT, N Items( dlgDT ) );
Eval List( dlgDT );
);
Current Data Table( selectDT[1] );
// -----------------------------------------GUI -----------------------------------------//
cd = Column Dialog(
Title( " Data Preprocessing " ),
List= ColList( " Select Columns >> ", MinCol( 1 ) ),
HList( " % of Empty elements in the column -> (0-100) ", mypercent1 = EditNumber( 50 ) ),
HList( " % of Stagnant elements in the column -> (0-100) ", mypercent = EditNumber( 80 ) ),
VList(
numeric = Check Box( "Columns Numeric - Impute missing cells with the mean of the others", 1 ),
character = Check Box( "Columns character - Impute missing cells with N/A", 1 ),
),
);
//declaration of variables
impute_num_col = cd["numeric"]; // impute numerics
impute_char = cd["character"]; // impute characters
percent = cd["mypercent"];
percent1 = cd["mypercent1"];
mylist = cd["List"];
dt= selectDT[1];
delcols = {};
numRows = nrows(dt);
for(i=1; maxi=Nitems(mylist), i<=maxi, i++,
if(nrows(loc(aslist(column(dt,mylist[i])<<getasmatrix),mode(column(dt,mylist[i])<<getasmatrix))) >= numRows * percent/100,
insert into(delcols, myList[i]);
,
if(Col N Missing(column(dt,mylist[i])) >= numRows * percent1/100,
insert into(delcols, myList[i]);
);
);
);
nitems = N Items( delcols );
colname = {};
colnamelist = {};
For( p = 1, p <= nitems, p++,
colname = Column( delcols[p] ) << Get Name;
Insert Into( colnamelist, colname(p) );
);
dt << delete columns (colnamelist);
//OK! Columns Numeric - Impute missing cells with the mean of the others
If( impute_num_col == 1,
cc = dt << Get Column Names( Continuous );
ncols = N Items( cc );
For( i = 1, i <= ncols, i++,
values = Column( dt, cc[i] ) << get values( 1 );
values[Loc( Is Missing( values ) )] = Mean( values );
Column( dt, cc[i] ) << setvalues( values, cc[i] );
);
);
// Columns character - Impute missing cells with NA
If( impute_char == 1,
FILL = dt << get column names( Character );
For( i = 1, i <= N Items( FILL ), i++,
FILL[i][dt << get rows where( Is Missing( FILL[i][] ) )] = "NA";
);
);
but the problem is that the last part of script (#Columns character - Impute missing cells with NA#)
don't know why... doesn't work....
very strange because the same code works if isn't integrate in the job above.
dt = Current Data Table();
FILL = dt << get column names( Character );
For( i = 1, i <= N Items( FILL ), i++,
FILL[i][dt << get rows where( Is Missing( FILL[i][] ) )] = "NA";
);
do you have idea why it is happening... ?
thanks for collaboration
Gianpaolo Polsinelli