Continuing with my earlier idea by separating text file into sections and then parsing those sections (this recording partially covers this topic Scripters Club 2024: Session 2 - Preparing Unstructured Incoming Data for Analysis).
Names Default To Here(1);
// Test sections seem to always start with a line starting with "SetupTitle"
// and end to same UNLESS it is last section when it ends with
// line of ",,,,,,,,,,,,,,,,,,,,,,,"
TEST_SECTION_START = "SetupTitle, ";
EMPTY_LINE = ",,,,,,,,,,,,,,,,,,,,,,,";
// Section patterns for regex
PROJECT_PATTERN = "^(.+)_(\d+\.\d*)(W\d{1,2})_(T\d+)_(.+): (\d+ \d+)\,+?";
SUBSITE_PATTERN = "<Subsite>(\d+)</Subsite>";
DATASTART_PATTERN = "^DataName";
find_test_section_lines = function({lines}, {Default Local},
start_idx = 0;
end_idx = 0;
TEST_SECTION_START = TEST_SECTION_START; // JMP "feature", 00075244
TEST_SECTION_END = TEST_SECTION_END; // JMP "feature", 00075244
For Each({line, idx}, lines,
If(Starts With(line, TEST_SECTION_START),
If(start_idx == 0,
start_idx = idx;
,
end_idx = idx;
break();
);
);
);
If(start_idx != 0 & end_idx == 0,
end_idx = N Items(lines);
);
return(Eval List({start_idx, end_idx}));
);
parse_test_section = function({lines}, {Default Local},
data_start = 0;
PROJECT_PATTERN = PROJECT_PATTERN; //JMP "feature", 00075244
SUBSITE_PATTERN = SUBSITE_PATTERN; //JMP "feature", 00075244
DATASTART_PATTERN = DATASTART_PATTERN; //JMP "feature", 00075244
For Each({line, idx}, lines,
If(!IsMissing(Regex(line, PROJECT_PATTERN)),
matches = Regex Match(line, PROJECT_PATTERN);
projname = matches[2];
lotid = matches[3];
waferid = matches[4];
time = matches[5];
notes = matches[6];
reticles = Words(Trim Whitespace(matches[7]), " ");
, !IsMissing(Regex(line, SUBSITE_PATTERN)),
subsite = Regex(line, SUBSITE_PATTERN, "\1");
, !IsMissing(Regex(line, DATASTART_PATTERN)),
data_start = idx;
break();
);
);
data_str = Concat Items(lines[data_start::N Items(lines)], "\!N");
dt = Open(Char To Blob(data_str), "text", invisible);
For Each({colname}, Reverse(dt << Get Column Names("String")), // drop empty columns
If(Col Number(Column(dt, colname)) == 0,
dt << Delete Column(colname);
,
break(); // break on first "ok" column
);
);
// define order and names here
dt << New Column("project", Character, Nominal, Set Each Value(projname));
dt << New Column("lot", Character, Nominal, Set Each Value(lotid));
dt << New Column("waferid", Character, Nominal, Set Each Value(waferid));
dt << New Column("time", Character, Nominal, Set Each Value(time));
dt << New Column("notes", Character, Nominal, Set Each Value(notes));
dt << New Column("x", Character, Nominal, Set Each Value(reticles[1]));
dt << New Column("y", Character, Nominal, Set Each Value(reticles[2]));
dt << New Column("subsite", Character, Nominal, Set Each Value(subsite));
dt << Move Selected Columns({:project, :lot, :waferid, :time, :notes, :x, :y, :subsite}, To First);
dt << Delete Columns("DataName");
return(dt);
);
// Start parsing
filepath = "$DOWNLOADS/Raw data examples_3DP.csv";
txt = Load Text File(filepath);
lines = Words(txt, "\!N");
lines = Filter Each({line}, lines, line != EMPTY_LINE); // drop empty lines
{start, end} = find_test_section_lines(lines);
dt_result = Empty();
While(All(start, end),
cur_testset = Remove From(lines, start, end - 1);
dt = parse_test_section(cur_testset);
If(Is Empty(dt_result),
dt_result = dt;
,
dt_result << Concatenate(
dt,
"Append to first table"
);
Close(dt, no save);
);
{start, end} = find_test_section_lines(lines);
);
dt_result << Show Window(1);
-Jarmo