cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Check out the JMP® Marketplace featured Capability Explorer add-in
Choose Language Hide Translation Bar
danielrbiber
Level III

Joining Multiple Tables by Row

Greetings,

I’ve been beating my head for long enough with code that should be quite simple.

 

I would like to take tables opened from multiple excel file and sheets and join them in a wide table with columns that are named to correspond to their excel sheet.

In my case, columns should be as shown below.  

 

Col 1 of TestWB1Sheet1 Col 2 of TestWB1Sheet1 Col 3 of TestWB1Sheet1 Col 1 of TestWB1Sheet2 ... Col 3 of TestWB2Sheet4

The one catch is that I don't want to upload data from the last sheet in each excel file (sheet5 in my example).

 

I’ve tried using the code from this discussion but can’t seem to get the naming correct. https://community.jmp.com/t5/Discussions/Joining-Multiple-Data-Table-using-a-loop/m-p/9451

I believe the answer may be in this discussion but it is hard for me to digest the solution because of the extraneous code.

https://community.jmp.com/t5/Discussions/Joining-multiple-DataTables-and-retaining-column-name/m-p/1...

 

Here is my simplified code that doesn’t name columns correctly:

 

Names Default to Here(1);

//Close All is a convienience script to use while writing the script
//Close All(Data Tables, NoSave);

//Create list of files to open
file = Pick File("Pick file/files", "$Desktop", {"All Files|*"}, 1,0, "", multiple);

//create blank list of data tables
dtList = {};

//Open list of files to dtList 
for (i=1, i<=nitems(file), i++,
	
	ws = Get Excel Worksheets(file[i]);
	sws = ws[1::(N Items(ws) - 1)];
	
	show(i);
	
	for (s=1, s<=nitems(sws), s++,
		
		show(s);
	
		dt = Open(file[i], 
		Worksheets( sws[s] ), 
		use for all sheets( 1 ),
		Concatenate Worksheets( 0 ),
		Create Concatenation Column( 0 ),
		Worksheet Settings(
			1,
			Has Column Headers( 1 ),
			Number of Rows in Headers( 1 ),
			Headers Start on Row( 1 ),
			Data Starts on Row( 2 ),
			Data Starts on Column( 1 ),
			Data Ends on Row( 0 ),
			Data Ends on Column( 0 ),
			Replicated Spanned Rows( 1 ),
			Replicated Spanned Headers( 0 ),
			Suppress Hidden Rows( 1 ),
			Suppress Hidden Columns( 1 ),
			Suppress Empty Columns( 1 ),
			Treat as Hierarchy( 0 ),
			Multiple Series Stack( 0 ),
			Import Cell Colors( 0 ),
			Limit Column Detect( 0 ),
			Column Separator String( "-" )
			)
		);
		
	dt << Set Name(char(i)||char(s));
		
	insert into (dtList, dt);
	)
);



//Designate dt_old to hold the joined data
dt_old = data table(dtList[1]);

//Join all tables in dtList to dt_old by row number
for ( i = 2, i <= N Items( dtList ), i++,
	
	dt_new = data table(dtList[i]);
	
	join_name = "Limits" || char(i);
	
	show(i);
	show(dt_new);
	
	dt_old << Join( With (dt_new),
		By Row Number,
		Output Table( join_name )
	);
	
	show(dt_old);
	
	close(dt_old, nosave);
	
	dt_old = data table(join_name);
	
);

Thanks in advance for the help. 

 

1 ACCEPTED SOLUTION

Accepted Solutions
txnelson
Super User

Re: Joining Multiple Tables by Row

Here is my modification of your code to create all of the new columns, and to join them together

I just had to add a simple piece of JSL to rename the columns.  All of the rest of your code worked fine.

Names Default To Here( 1 );

//Close All is a convienience script to use while writing the script
//Close All(Data Tables, NoSave);

//Create list of files to open
file = Pick File( "Pick file/files", "$Desktop", {"All Files|*"}, 1, 0, "", multiple );

//create blank list of data tables
dtList = {};

//Open list of files to dtList 
For( i = 1, i <= N Items( file ), i++, 
	
	ws = Get Excel Worksheets( file[i] );
	sws = ws[1 :: (N Items( ws ) - 1)];
	
	Show( i );
	
	For( s = 1, s <= N Items( sws ), s++, 
		
		Show( s );
	
		dt = Open(
			file[i],
			Worksheets( sws[s] ),
			use for all sheets( 1 ),
			Concatenate Worksheets( 0 ),
			Create Concatenation Column( 0 ),
			Worksheet Settings(
				1,
				Has Column Headers( 1 ),
				Number of Rows in Headers( 1 ),
				Headers Start on Row( 1 ),
				Data Starts on Row( 2 ),
				Data Starts on Column( 1 ),
				Data Ends on Row( 0 ),
				Data Ends on Column( 0 ),
				Replicated Spanned Rows( 1 ),
				Replicated Spanned Headers( 0 ),
				Suppress Hidden Rows( 1 ),
				Suppress Hidden Columns( 1 ),
				Suppress Empty Columns( 1 ),
				Treat as Hierarchy( 0 ),
				Multiple Series Stack( 0 ),
				Import Cell Colors( 0 ),
				Limit Column Detect( 0 ),
				Column Separator String( "-" )
			)
		);
		
		dt << Set Name( Char( i ) || Char( s ) );
		
		Insert Into( dtList, dt );
	
	// set new column names
		currFileName = Word( -2, File[i], ".\/" );
		For( k = 1, k <= N Cols( dt ), k++,
			Column( dt, k ) << set name(
				Column( dt, k ) << get name || " of " || currFileName || sws[s]
			)
		);
	);
);

//Designate dt_old to hold the joined data
dt_old = Data Table( dtList[1] );

//Join all tables in dtList to dt_old by row number
For( i = 2, i <= N Items( dtList ), i++, 
	
	dt_new = Data Table( dtList[i] );
	
	join_name = "Limits" || Char( i );
	
	Show( i );
	Show( dt_new );
	
	dt_old << Join( With( dt_new ), By Row Number, Output Table( join_name ) );
	
	Show( dt_old );
	
	Close( dt_old, nosave );
	
	dt_old = Data Table( join_name );
	
);
Jim

View solution in original post

2 REPLIES 2
jthi
Super User

Re: Joining Multiple Tables by Row

 

Names Default To Here(1);

//directory with files
xlsx_files = Files In Directory("./WB");

//dt collection list
dt_list = {};

For Each({file}, xlsx_files,
	//open datatables one by one
	dt = Open(
		"WB/" || file,
		Worksheets({"Sheet1", "Sheet2", "Sheet3", "Sheet4"}),
		Use for all sheets(1),
		Concatenate Worksheets(1),
		Create Concatenation Column(1),
		Worksheet Settings(
			1,
			Has Column Headers(1),
			Number of Rows in Headers(1),
			Headers Start on Row(1),
			Data Starts on Row(2),
			Data Starts on Column(1),
			Data Ends on Row(0),
			Data Ends on Column(0),
			Replicated Spanned Rows(1),
			Replicated Spanned Headers(0),
			Suppress Hidden Rows(1),
			Suppress Hidden Columns(1),
			Suppress Empty Columns(1),
			Treat as Hierarchy(0),
			Multiple Series Stack(0),
			Import Cell Colors(0),
			Limit Column Detect(0),
			Column Separator String("-")
		)
	);
	//add table name to SourceTable column
	Column(dt, "Source Table") << Set Each Value(Word(1, file, ".") || :"Source Table");
	//add to dtlist
	Insert Into(dt_list, dt);
);

//concatenate
For(i = 2, i <= N Items(dt_list), i++,
	dt_list[1] << Concatenate(dt_list[i], "Append to first table");
	Close(dt_list[i], no save);
);

//split
dt = dt_list[1] << Split(
	Split By(:Source Table),
	Split(:Col1, :Col2, :Col3),
	Sort by Column Property
);
//Close(dt_list[1], no save);
  1. Get all files in directory (should remove unnecessary files if there are any)
  2. Loop over the xlsx files while inserting them to list
  3. Concatenate all files to one jmp datatable
  4. Split the datatable

Tables before concatenate:

jthi_0-1637604352728.png

After concatenate:

jthi_1-1637604381613.png

After split:

jthi_2-1637604404513.png

 

 

 

-Jarmo
txnelson
Super User

Re: Joining Multiple Tables by Row

Here is my modification of your code to create all of the new columns, and to join them together

I just had to add a simple piece of JSL to rename the columns.  All of the rest of your code worked fine.

Names Default To Here( 1 );

//Close All is a convienience script to use while writing the script
//Close All(Data Tables, NoSave);

//Create list of files to open
file = Pick File( "Pick file/files", "$Desktop", {"All Files|*"}, 1, 0, "", multiple );

//create blank list of data tables
dtList = {};

//Open list of files to dtList 
For( i = 1, i <= N Items( file ), i++, 
	
	ws = Get Excel Worksheets( file[i] );
	sws = ws[1 :: (N Items( ws ) - 1)];
	
	Show( i );
	
	For( s = 1, s <= N Items( sws ), s++, 
		
		Show( s );
	
		dt = Open(
			file[i],
			Worksheets( sws[s] ),
			use for all sheets( 1 ),
			Concatenate Worksheets( 0 ),
			Create Concatenation Column( 0 ),
			Worksheet Settings(
				1,
				Has Column Headers( 1 ),
				Number of Rows in Headers( 1 ),
				Headers Start on Row( 1 ),
				Data Starts on Row( 2 ),
				Data Starts on Column( 1 ),
				Data Ends on Row( 0 ),
				Data Ends on Column( 0 ),
				Replicated Spanned Rows( 1 ),
				Replicated Spanned Headers( 0 ),
				Suppress Hidden Rows( 1 ),
				Suppress Hidden Columns( 1 ),
				Suppress Empty Columns( 1 ),
				Treat as Hierarchy( 0 ),
				Multiple Series Stack( 0 ),
				Import Cell Colors( 0 ),
				Limit Column Detect( 0 ),
				Column Separator String( "-" )
			)
		);
		
		dt << Set Name( Char( i ) || Char( s ) );
		
		Insert Into( dtList, dt );
	
	// set new column names
		currFileName = Word( -2, File[i], ".\/" );
		For( k = 1, k <= N Cols( dt ), k++,
			Column( dt, k ) << set name(
				Column( dt, k ) << get name || " of " || currFileName || sws[s]
			)
		);
	);
);

//Designate dt_old to hold the joined data
dt_old = Data Table( dtList[1] );

//Join all tables in dtList to dt_old by row number
For( i = 2, i <= N Items( dtList ), i++, 
	
	dt_new = Data Table( dtList[i] );
	
	join_name = "Limits" || Char( i );
	
	Show( i );
	Show( dt_new );
	
	dt_old << Join( With( dt_new ), By Row Number, Output Table( join_name ) );
	
	Show( dt_old );
	
	Close( dt_old, nosave );
	
	dt_old = Data Table( join_name );
	
);
Jim