This JSL script will create a JMP data table from a Data Interchange Format or DIF file. The file format originated from Visicalc in the 1980 as a mechanism to transfer data between spreadsheet programs.
In the example code, I provide 4 mechanisms to check for the required headers with only one active at a time.
For additional information on Regex see @Craige_Hales' blog post and for insights into the Pat Match option see the JSL Cookbook.
By default, I have implemented Option 2: Pat Match for error checking.
Required Header Items:
Header Format, i.e., structure of the header attribute
<< Attribute Name >>
<< Vector Number >> , << Numeric Value >>
<< "String value" >>
Where, |
<< Attribute_Name >> is a symbol/token of (usually) 32 or fewer characters
<< Vector Number >> is 0 if entire file
<< Numeric Value >> is 0 unless otherwise specified
<< "String Value" >> is \!"\!" (double quote with no space between) if not used
Optional Header Items: Please note these optional headers were not tested during the development of the add-in
DATA:
<< Data Type Indicator >>, << Numeric Value >>
<< "String value" >>
Where,
<< Data Type Indicator >> is one of three values -1, 0, 1
-1 --is a special data value
<< "String Value" >> = BOT or EOD
0 --numeric signed decimal number
<< Numeric Value >> = is numeric data
<< "String Value" >> = is one of the value indicators (V, NA, ERROR, TRUE, FALSE)
1 --string data value
<< Numeric Value >> = 0
<< "String Value" >> = string data value
/****************************************************************************************************************************************************************/
/****************************************************************************************************************************************************************/
/* */
/* SAS INSTITUTE INC. IS PROVIDING YOU WITH THE COMPUTER SOFTWARE CODE INCLUDED WITH THIS AGREEMENT ("CODE") ON AN "AS IS" BASIS, */
/* AND AUTHORIZES YOU TO USE THE CODE SUBJECT TO THE TERMS HEREOF. BY USING THE CODE, YOU AGREE TO THESE TERMS. */
/* YOUR USE OF THE CODE IS AT YOUR OWN RISK. SAS INSTITUTE INC. MAKES NO REPRESENTATION OR WARRANTY, EXPRESS OR IMPLIED, */
/* INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE, WITH RESPECT TO THE CODE. */
/* */
/* The Code is intended to be used solely as part of a product ("Software") you currently have licensed from SAS Institute Inc. */
/* or one of its subsidiaries or authorized agents ("SAS"). The Code is designed to either correct an error in the Software or */
/* to add functionality to the Software, but has not necessarily been tested. Accordingly, SAS makes no representation or warranty */
/* that the Code will operate error-free. SAS is under no obligation to maintain or support the Code. */
/* */
/* Neither SAS nor its licensors shall be liable to you or any third party for any general, special, direct, indirect, consequential, */
/* incidental or other damages whatsoever arising out of or related to your use or inability to use the Code, even if SAS has been advised */
/* of the possibility of such damages. */
/* */
/* Except as otherwise provided above, the Code is governed by the same agreement that governs the Software. If you do not have an */
/* existing agreement with SAS governing the Software, you may not use the Code. */
/* */
/****************************************************************************************************************************************************************/
/****************************************************************************************************************************************************************/
/************************************************************************************************************************************
| |
| Written By: Stan Koprowski |
| Date: April 10, 2019 |
| Version: 1.0 |
| |
| Required Header Items: Title, Vector, Tuples, Data |
| |
| Header Format, i.e., structure of the header attribute |
| << Attribute Name >> | |
| << Vector Number >> , << Numeric Value >> |
| << "String value" >> |
| |
| Where, |
| << Attribute_Name >> is a symbol/token of (usually) 32 or fewer characters |
| << Vector Number >> is 0 if entire file |
| << Numeric Value >> is 0 unless otherwise specified |
| << "String Value" >> is \!"\!" (double quote with no space between) if not used |
| |
| Optional Header Items: Comment, Label, Size, Truelength, Units, DispalyUnits | |
| |
| DATA: |
| << Data Type Indicator >>, << Numeric Value >> |
| << "String value" >> |
| |
| Where, |
| << Data Type Indicator >> is one of three values -1, 0, 1 |
| |
| -1 is a special data value |
| << "String Value" >> = BOT or EOD |
| |
| 0 numeric signed decimal number |
| << Numeric Value >> = is numeric data |
| << "String Value" >> = is one of the value indicators (V, NA, ERROR, TRUE, FALSE) |
| |
| 1 string data value | |
| << Numeric Value >> = 0 |
| << "String Value" >> = string data value | |
| |
| |
|************************************************************************************************************************************/
Names Default To Here( 1 );
errorhdrchkExpr = Expr(
// regex_hdrs = "(TABLE|VECTORS|TUPLES|DATA)";
matrix_hdrs = { "TABLE", "VECTORS", "TUPLES", "DATA" };
// rqd_hdrs = { TABLE, VECTORS, TUPLES, DATA };
// Option 1: Find match using Regex with check for required headers
/* thematch = Regex( lines, regex_hdrs );
If( IsMissing(thematch),
errnum_desc = "Error: A required header is missing";
errhdrnum = 0;
Break();
,
errhdrnum = 1
);*/
// Option 2: Find specific match using Pat Match with check for required headers
err_List = {};
For( e = 1, e <= Nitems( matrix_hdrs ), e++,
thematch = patMatch( txtfile, matrix_hdrs[e] );
If( thematch == 0,
errnum_desc = Concat("Error ", char( e ), ": ", "A required header ", matrix_hdrs[e], " is missing.");
Insert Into( err_List, errnum_desc);
errhdrnum = 0;
Break();
,
errhdrnum = 1
)
);
// Option 3: Find specific match using LOC with check for required headers
/* err_List = {};
For ( e = 1, e <= Nitems( rqd_hdrs ), e++,
If( Loc( {lines}, rqd_hdrs[e] ) != [1],
errnum_desc = Concat("Error ", char( e ), ": ", "A required header ", rqd_hdrs[e], " is missing");
Insert Into( err_List, errnum_desc);
errhdrnum = 0;
Break();
,
errhdrnum = 1
)
);*/
// Option 4: Find specific match using Contains with check for required headers
// err_List = {};
// For ( e = 1, e <= Nitems( rqd_hdrs ), e++,
// If( Contains( txtfile, rqd_hdrs[e] ) == 0,
// errnum_desc = Concat("Error ", char( e ), ": ", "A required header ", rqd_hdrs[e], " is missing");
// Insert Into( err_List, errnum_desc);
// errhdrnum = 0;
// Break();
// ,
// errhdrnum = 1
// )
// );
);
errmsgExpr = Expr(
Caption( {100, 200}, errnum_desc, Font( "Arial Black" ), Font Size( 16 ), Text Color( "Red" ), Back Color( "yellow" ), Spoken( 1 ) );
);
getdataExpr = Expr(
For( i = 1, i <= Nitems( lines ), i++,
If( Contains( Trim(lines[i] ), "TABLE" ),
Table_Name = Trim(Word( 1, lines[i+2] ));
if( Table_Name != "\!"\!"", dt << Set Name( char(Table_Name ) ));
//Check for Excel file; often invert vectors (columns) and tuples (rows)
If( Parse( Table_Name ) == "EXCEL",
excelflag = 1
)
,
Contains( Trim( lines[i] ), "VECTORS" ),
If( excelflag == 1,
tot_rows = Num( Word( 2, Trim( lines[i+1] ), "," ));
dt << Add Rows( tot_rows )
,
cols = Num( Word( 2, Trim( lines[i+1] ), "," ));
If( labelflag == 0,
For( c=1, c <= cols, c++,
dt << New Column << Set Data Type( "Character" );
labelflag = 1
)
)
)
,
Contains( Trim( lines[i] ), "TUPLES" ),
If( excelflag == 1,
cols = Num( Word( 2, Trim( lines[i+1] ), "," ));
If(labelflag == 0,
For(c=2, c <= cols, c++,
dt << New Column << Set Data Type( "Character" );
labelflag = 1
)
)
,
tot_rows = Num( Word( 2, Trim( lines[i+1] ), "," ));
dt << Add Rows(tot_rows)
)
,
// Check for Label (optional) header
Contains( Trim( lines[i]), "LABEL" ),
If(labelflag == 0,
col_name = Word( 1, Trim( lines[i+2] ), "\!"" );
dt << New Column( col_name ) << Set Data Type( "Character" );
labelflag = 1
)
,
// Check for Size (optional) header
Contains( Trim( lines[i] ), "SIZE" ),
size_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
sizeflag = 1
,
// Check for True Length (optional) header
Contains( Trim( lines[i] ), "TRUELENGTH" ),
trulngth_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
truelengthflag = 1
,
// Check for Comment (optional) header
Contains( Trim( lines[i] ), "COMMENT" ),
comment_txt = Trim( Word( 1, lines[i+2], "\!"" ));
commentflag = 1
,
// Check for Units (optional) header
Contains( Trim( lines[i] ), "UNITS" ),
units_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
unitsflag = 1
,
// Check for Display Units (optional) header
Contains( Trim( lines[i] ), "DISPLAYUNITS" ),
displayunits_txt = Word( 1, Trim( lines[i+2] ), "\!"" );
dispalyUnitsflag = 1
,
Contains( Trim( lines[i] ), "DATA" ),
cnt = 0;
rn = 0;
numrows = NItems( lines );
For( cl = i+3, cl <= numrows, cl++,
cnt = cl;
If(
Num( Word( 1, Trim( lines[cl] ), "," )) == -1 & Word( 1, Trim( lines[cl+1] )) == "EOD",
Break()
,
Num( Trim( Word( 1, lines[cl], "," ))) == -1 & Word( 1, Trim( lines[cl+1])) == "BOT",
rn = rn + 1;
For(c = 1, c <= cols, c++,
cnt= cnt+2;
chk_val = Num( Word( 1, Trim( lines[cnt] ), "," ));
If( excelflag == 1,
num_val = Word( 1, Word( 2, Trim( lines[cnt] ), "," ),"\!"\!"")
,
num_val = Num( Word( 2, Trim( lines[cnt] ), "," ));
);
If( excelflag == 1,
string_val = Word( 1, Trim( lines[cnt+1]),"\!"\!"")
,
string_val = Word( 1, Trim( lines[cnt+1] ), "\!"" )
);
if(rn == 1 & excelflag != 1,
column(dt, c)<< Set Name( string_val);
delflag = 1;
);
If( chk_val == 0 & excelflag != 1,
column(dt, c) << Set Data Type("Numeric") << Format( "Best", "Use Thousands Separator") << Set Modeling Type("Continuous");
column(dt, c)[rn] = num_val;
,
chk_val == 1 & excelflag != 1,
column(dt, c) << Set Data Type("Character") << Set Modeling Type("Nominal");
column(dt, c)[rn] = string_val;
,
chk_val == 0 & excelflag == 1,
column(dt, c) << Set Data Type("Character") << Set Modeling Type("Nominal");
column(dt, c)[rn] = num_val;
,
chk_val == 1 & excelflag == 1,
column(dt, c) << Set Data Type("Character") << Set Modeling Type("Nominal");
column(dt, c)[rn] = string_val;
,
Break()
);
);
);
);
);
);
);
//Remove first row (column names) from data table
CleanRowsExpr = Expr(
If(delflag == 1,
dt << Select Rows([1]);
dt << Delete Rows
);
);
main_menuExpr = Expr(
thefile = Pick File( "Select Data Interchange File (DIF)", "$Desktop", {"DIF Files|dif", "All Files|*"}, 1, 0, "newJmpFile.jmp" );
// txtfile = Load Text File( thefile, Charset("ascii"));
txtfile = Load Text File( thefile);
lines = words( txtfile, "\!N\!r" );
commentflag = sizeflag = truelengthflag = unitsflag = dispalyUnitsflag = labelflag = delflag = excelflag = 0;
Eval( errorhdrchkExpr );
If( errhdrnum == 1,
dt = New Table();
Eval( getdataExpr );
Eval( CleanRowsExpr )
,
Eval( errmsgExpr )
);
);
// Main Menu
Clear Log();
Eval( main_menuExpr );
A basic text importer for data interchange format (DIF) files add-in version of the script is also available.
cheers,
Stan