/****************************************************************************************************************************************************************/
/****************************************************************************************************************************************************************/
/* */
/* SAS INSTITUTE INC. IS PROVIDING YOU WITH THE COMPUTER SOFTWARE CODE INCLUDED WITH THIS AGREEMENT ("CODE") ON AN "AS IS" BASIS, */
/* AND AUTHORIZES YOU TO USE THE CODE SUBJECT TO THE TERMS HEREOF. BY USING THE CODE, YOU AGREE TO THESE TERMS. */
/* YOUR USE OF THE CODE IS AT YOUR OWN RISK. SAS INSTITUTE INC. MAKES NO REPRESENTATION OR WARRANTY, EXPRESS OR IMPLIED, */
/* INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE, WITH RESPECT TO THE CODE. */
/* */
/* The Code is intended to be used solely as part of a product ("Software") you currently have licensed from SAS Institute Inc. */
/* or one of its subsidiaries or authorized agents ("SAS"). The Code is designed to either correct an error in the Software or */
/* to add functionality to the Software, but has not necessarily been tested. Accordingly, SAS makes no representation or warranty */
/* that the Code will operate error-free. SAS is under no obligation to maintain or support the Code. */
/* */
/* Neither SAS nor its licensors shall be liable to you or any third party for any general, special, direct, indirect, consequential, */
/* incidental or other damages whatsoever arising out of or related to your use or inability to use the Code, even if SAS has been advised */ 
/* of the possibility of such damages. */
/* */
/* Except as otherwise provided above, the Code is governed by the same agreement that governs the Software. If you do not have an */
/* existing agreement with SAS governing the Software, you may not use the Code. */
/* */
/****************************************************************************************************************************************************************/
/****************************************************************************************************************************************************************/

/************************************************************************************************************************************
| |
| Written By: Stan Koprowski |
| Date: April 10, 2019 |
| Version: 1.0 |
| |
| Required Header Items: Title, Vector, Tuples, Data |
| |
| Header Format, i.e., structure of the header attribute |
| << Attribute Name >> | |
| << Vector Number >> , << Numeric Value >> |
| << "String value" >> |
| |
| Where, |
| << Attribute_Name >> is a symbol/token of (usually) 32 or fewer characters |
| << Vector Number >> is 0 if entire file |
| << Numeric Value >> is 0 unless otherwise specified |
| << "String Value" >> is \!"\!" (double quote with no space between) if not used |
| |
| Optional Header Items: Comment, Label, Size, Truelength, Units, DispalyUnits | |
| |
| DATA: |
| << Data Type Indicator >>, << Numeric Value >> |
| << "String value" >> |
| |
| Where, |
| << Data Type Indicator >> is one of three values -1, 0, 1 |
| |
| -1 is a special data value |
| << "String Value" >> = BOT or EOD |
| |
| 0 numeric signed decimal number |
| << Numeric Value >> = is numeric data |
| << "String Value" >> = is one of the value indicators (V, NA, ERROR, TRUE, FALSE) |
| |
| 1 string data value | | 
| << Numeric Value >> = 0 | 
| << "String Value" >> = string data value | |
| |
| | 
|************************************************************************************************************************************/

Names Default To Here( 1 );

errorhdrchkExpr = Expr(
// regex_hdrs = "(TABLE|VECTORS|TUPLES|DATA)";
 matrix_hdrs = { "TABLE", "VECTORS", "TUPLES", "DATA" };
// rqd_hdrs = { TABLE, VECTORS, TUPLES, DATA };
 
// Option 1: Find match using Regex with check for required headers
/* thematch = Regex( lines, regex_hdrs );
 If( IsMissing(thematch),
 errnum_desc = "Error: A required header is missing";
 errhdrnum = 0;
 Break();
 ,
 errhdrnum = 1
 );*/
 
// Option 2: Find specific match using Pat Match with check for required headers 
 err_List = {};
 For( e = 1, e <= Nitems( matrix_hdrs ), e++,
 thematch = patMatch( txtfile, matrix_hdrs[e] ); 
 If( thematch == 0,
 errnum_desc = Concat("Error ", char( e ), ": ", "A required header ", matrix_hdrs[e], " is missing.");
 Insert Into( err_List, errnum_desc);
 errhdrnum = 0;
 Break();
 ,
 errhdrnum = 1
 ) 
 );
 
 // Option 3: Find specific match using LOC with check for required headers
/* err_List = {};
 For ( e = 1, e <= Nitems( rqd_hdrs ), e++,
 If( Loc( {lines}, rqd_hdrs[e] ) != [1],
 errnum_desc = Concat("Error ", char( e ), ": ", "A required header ", rqd_hdrs[e], " is missing");
 Insert Into( err_List, errnum_desc);
 errhdrnum = 0;
 Break();
 ,
 errhdrnum = 1
 ) 
 );*/
 
 // Option 4: Find specific match using Contains with check for required headers
// err_List = {};
// For ( e = 1, e <= Nitems( rqd_hdrs ), e++,
// If( Contains( txtfile, rqd_hdrs[e] ) == 0,
// errnum_desc = Concat("Error ", char( e ), ": ", "A required header ", rqd_hdrs[e], " is missing");
// Insert Into( err_List, errnum_desc);
// errhdrnum = 0;
// Break();
// ,
// errhdrnum = 1
// ) 
// );

);

errmsgExpr = Expr(
 Caption( {100, 200}, errnum_desc, Font( "Arial Black" ), Font Size( 16 ), Text Color( "Red" ), Back Color( "yellow" ), Spoken( 1 ) );
);

getdataExpr = Expr(
 For( i = 1, i <= Nitems( lines ), i++, 
 If( Contains( Trim(lines[i] ), "TABLE" ),
 Table_Name = Trim(Word( 1, lines[i+2] ));
 if( Table_Name != "\!"\!"", dt << Set Name( char(Table_Name ) ));
 //Check for Excel file; often invert vectors (columns) and tuples (rows)
 If( Parse( Table_Name ) == "EXCEL", 
 excelflag = 1 
 ) 
 , 
 Contains( Trim( lines[i] ), "VECTORS" ), 
 If( excelflag == 1,
 tot_rows = Num( Word( 2, Trim( lines[i+1] ), "," ));
 dt << Add Rows( tot_rows )
 ,
 cols = Num( Word( 2, Trim( lines[i+1] ), "," ));
 If( labelflag == 0,
 For( c=1, c <= cols, c++,
 dt << New Column << Set Data Type( "Character" );
 labelflag = 1
 )
 )
 )
 , 
 Contains( Trim( lines[i] ), "TUPLES" ), 
 If( excelflag == 1,
 cols = Num( Word( 2, Trim( lines[i+1] ), "," ));
 If(labelflag == 0,
 For(c=2, c <= cols, c++,
 dt << New Column << Set Data Type( "Character" );
 labelflag = 1
 )
 )
 ,
 tot_rows = Num( Word( 2, Trim( lines[i+1] ), "," ));
 dt << Add Rows(tot_rows)
 )
 ,
 // Check for Label (optional) header
 Contains( Trim( lines[i]), "LABEL" ), 
 If(labelflag == 0,
 col_name = Word( 1, Trim( lines[i+2] ), "\!"" );
 dt << New Column( col_name ) << Set Data Type( "Character" );
 labelflag = 1
 )
 ,
 // Check for Size (optional) header
 Contains( Trim( lines[i] ), "SIZE" ), 
 size_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
 sizeflag = 1
 ,
 // Check for True Length (optional) header
 Contains( Trim( lines[i] ), "TRUELENGTH" ), 
 trulngth_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
 truelengthflag = 1
 ,
 // Check for Comment (optional) header
 Contains( Trim( lines[i] ), "COMMENT" ), 
 comment_txt = Trim( Word( 1, lines[i+2], "\!"" ));
 commentflag = 1
 ,
 // Check for Units (optional) header
 Contains( Trim( lines[i] ), "UNITS" ), 
 units_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
 unitsflag = 1
 ,
 // Check for Display Units (optional) header
 Contains( Trim( lines[i] ), "DISPLAYUNITS" ), 
 displayunits_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
 dispalyUnitsflag = 1
 ,
 Contains( Trim( lines[i] ), "DATA" ),
 cnt = 0;
 rn = 0;
 numrows = NItems( lines );
 For( cl = i+3, cl <= numrows, cl++, 
 cnt = cl;
 If( 
 Num( Word( 1, Trim( lines[cl] ), "," )) == -1 & Word( 1, Trim( lines[cl+1] )) == "EOD",
 Break()
 ,
 Num( Trim( Word( 1, lines[cl], "," ))) == -1 & Word( 1, Trim( lines[cl+1])) == "BOT",
 rn = rn + 1;
 For(c = 1, c <= cols, c++,
 cnt= cnt+2;
 chk_val = Num( Word( 1, Trim( lines[cnt] ), "," ));
 If( excelflag == 1,
 num_val = Word( 1, Word( 2, Trim( lines[cnt] ), "," ),"\!"\!"")
 ,
 num_val = Num( Word( 2, Trim( lines[cnt] ), "," ));
 );
 If( excelflag == 1,
 string_val = Word( 1, Trim( lines[cnt+1]),"\!"\!"")
 ,
 string_val = Word( 1, Trim( lines[cnt+1] ), "\!"" )
 );
 if(rn == 1 & excelflag != 1,
 column(dt, c)<< Set Name( string_val);
 delflag = 1;
 );
 If( chk_val == 0 & excelflag != 1,
 column(dt, c) << Set Data Type("Numeric") << Format( "Best", "Use Thousands Separator") << Set Modeling Type("Continuous");
 column(dt, c)[rn] = num_val;
 ,
 chk_val == 1 & excelflag != 1,
 column(dt, c) << Set Data Type("Character") << Set Modeling Type("Nominal");
 column(dt, c)[rn] = string_val;
 ,
 chk_val == 0 & excelflag == 1,
 column(dt, c) << Set Data Type("Character") << Set Modeling Type("Nominal");
 column(dt, c)[rn] = num_val;
 ,
 chk_val == 1 & excelflag == 1,
 column(dt, c) << Set Data Type("Character") << Set Modeling Type("Nominal");
 column(dt, c)[rn] = string_val;
 ,
 Break() 
 ); 
 ); 
 );
 );
 );
 ); 
);

//Remove first row (column names) from data table
CleanRowsExpr = Expr(
 If(delflag == 1,
 dt << Select Rows([1]);
 dt << Delete Rows
 );
);

main_menuExpr = Expr(
 thefile = Pick File( "Select Data Interchange File (DIF)", "$Desktop", {"DIF Files|dif", "All Files|*"}, 1, 0, "newJmpFile.jmp" );
// txtfile = Load Text File( thefile, Charset("ascii"));
 txtfile = Load Text File( thefile);
 lines = words( txtfile, "\!N\!r" );
 commentflag = sizeflag = truelengthflag = unitsflag = dispalyUnitsflag = labelflag = delflag = excelflag = 0;
 Eval( errorhdrchkExpr );
 If( errhdrnum == 1,
 dt = New Table();
 Eval( getdataExpr );
 Eval( CleanRowsExpr )
 ,
 Eval( errmsgExpr ) 
 );
);

// Main Menu
Clear Log();
Eval( main_menuExpr );