Warning: I am new to coding!
I have about 500 pdf files containing tabulated data I want to import. I managed to create a script that will extract, stack and transpose the data as well as append/concatenate it to a table (that I called "final.jmp"). Each pdf is represented by one row in final.jmp. Now here's the workflow and issues I identified:
My question:
Is it possible to run the script in a repetitive fashion, each time selecting a new pdf until all pdfs from a folder have been imported and collated into a final table?
Open(
"C:\example folder\example.pdf",
PDF Tables(
Table(
table name( "1" ),
add rows( page( 1 ), Rect( 2.0419, 0.9776, 3.1993, 1.1726 ) ),
add rows( page( 1 ), Rect( 1.2137, 3.7819, 6.3603, 3.9069 ) ),
add rows( page( 1 ), Rect( 0.8717, 3.9636, 3.0796, 4.0886 ) ),
add rows(
page( 1 ),
Rect( 0.9853, 4.1436, 3.0029, 4.2686 ),
column borders( 0.9853, 2.5172, 3.0029 )
),
add rows( page( 1 ), Rect( 4.581, 4.1436, 7.297, 4.2686 ) ),
add rows(
page( 1 ),
Rect( 0.8883, 4.4136, 4.6374, 4.5386 ),
column borders( 0.8883, 1.9603, 4.6374 )
),
add rows( page( 1 ), Rect( 0.7556, 4.7979, 7.5908, 8.2654 ) )
)
)
);
Data Table( "1" ) << Stack(
columns( :Column 3, :Column 4, :Column 5, :Column 6 ),
Source Label Column( "Label" ),
Stacked Data Column( "Data" ),
Output table("2")
);
Data Table( "2" ) << Transpose(
columns( :Data ),
Output Table( "Final" )
)
Possible flow:
Maybe something like this could work:
Names Default To Here(1);
//directory where .pdf files exist
pdfDirectory = "C:\example folder\";
//function to open stack and transpose pdf file
stackTransposePdfFile = function({filePath}, {Default Local},
dtTemp = Open(
filePath,
PDF Tables(
Table(
table name("1"),
add rows(page(1), Rect(2.0419, 0.9776, 3.1993, 1.1726)),
add rows(page(1), Rect(1.2137, 3.7819, 6.3603, 3.9069)),
add rows(page(1), Rect(0.8717, 3.9636, 3.0796, 4.0886)),
add rows(page(1), Rect(0.9853, 4.1436, 3.0029, 4.2686), column borders(0.9853, 2.5172, 3.0029)),
add rows(page(1), Rect(4.581, 4.1436, 7.297, 4.2686)),
add rows(page(1), Rect(0.8883, 4.4136, 4.6374, 4.5386), column borders(0.8883, 1.9603, 4.6374)),
add rows(page(1), Rect(0.7556, 4.7979, 7.5908, 8.2654))
)
)
);
//stack data table from pdf
dtTemp_stacked = dtTemp << Stack(
columns(:Column 3, :Column 4, :Column 5, :Column 6),
Source Label Column("Label"),
Stacked Data Column("Data"),
invisible
);
Close(dtTemp, no save); //close table created directly from pdf
//transpose table
dtTemp_transposed = dtTemp_stacked << Transpose(columns(:Data), invisible);
Close(dtTemp_stacked no save); //close stacked table
//return transposed table
return(dtTemp_transposed);
);
//list of files in directory (this assumes that there are only .pdf files)
pdfFile_list = Files In Directory(pdfDirectory);
//use the first table as "collection table"
finalDt = stackTransposePdfFile(pdfDirectory||pdfFile_list[1]);
//loop over files in pdfFile_list starting from second file
For(i = 2, i <= N Items(pdfFile_list), i++,
pdfDt = stackTransposePdfFile(pdfDirectory||pdfFile_list[i]);
finalDt << Concatenate(pdfDt, Append to first table);
Close(pdfDt, no save); //close transposed table
);
Possible flow:
Maybe something like this could work:
Names Default To Here(1);
//directory where .pdf files exist
pdfDirectory = "C:\example folder\";
//function to open stack and transpose pdf file
stackTransposePdfFile = function({filePath}, {Default Local},
dtTemp = Open(
filePath,
PDF Tables(
Table(
table name("1"),
add rows(page(1), Rect(2.0419, 0.9776, 3.1993, 1.1726)),
add rows(page(1), Rect(1.2137, 3.7819, 6.3603, 3.9069)),
add rows(page(1), Rect(0.8717, 3.9636, 3.0796, 4.0886)),
add rows(page(1), Rect(0.9853, 4.1436, 3.0029, 4.2686), column borders(0.9853, 2.5172, 3.0029)),
add rows(page(1), Rect(4.581, 4.1436, 7.297, 4.2686)),
add rows(page(1), Rect(0.8883, 4.4136, 4.6374, 4.5386), column borders(0.8883, 1.9603, 4.6374)),
add rows(page(1), Rect(0.7556, 4.7979, 7.5908, 8.2654))
)
)
);
//stack data table from pdf
dtTemp_stacked = dtTemp << Stack(
columns(:Column 3, :Column 4, :Column 5, :Column 6),
Source Label Column("Label"),
Stacked Data Column("Data"),
invisible
);
Close(dtTemp, no save); //close table created directly from pdf
//transpose table
dtTemp_transposed = dtTemp_stacked << Transpose(columns(:Data), invisible);
Close(dtTemp_stacked no save); //close stacked table
//return transposed table
return(dtTemp_transposed);
);
//list of files in directory (this assumes that there are only .pdf files)
pdfFile_list = Files In Directory(pdfDirectory);
//use the first table as "collection table"
finalDt = stackTransposePdfFile(pdfDirectory||pdfFile_list[1]);
//loop over files in pdfFile_list starting from second file
For(i = 2, i <= N Items(pdfFile_list), i++,
pdfDt = stackTransposePdfFile(pdfDirectory||pdfFile_list[i]);
finalDt << Concatenate(pdfDt, Append to first table);
Close(pdfDt, no save); //close transposed table
);
Thanks - I don't know where I'd be without this forum. Great stuff!
Thanks once again for all the help we get!