How do you import JSONs that contain double nested arrays?
Created:
Aug 15, 2022 09:58 AM
| Last Modified: Jun 10, 2023 1:52 PM(2245 views)
I'm trying to import a number of JSON files that contain double nested arrays (see example below). The arrays don't follow a consistent format, there could be numerous objects in each array or none at all. I want to have one score per row as shown below.
How can I get the data in this format using the import tool? Also, the name of the prediction is not always spelt the same e.g. sometimes it's "Small Cats" and sometimes it's "smallCats".
You might have to write custom parser for this. Use Load Text File(file_path, JSON); and then parse the resulting Associative Array. It should be kinda simple if the format is consistent.
Here is an example you can try to get you started. It seems to do what you want, on the sample file you provided. In your particular case the custom parsing is relatively straightforward, although this isn't always the case. For deeper, more convoluted structures, parsing with recursion would likely be preferable.
Cheers,
Brady
Names Default To Here(1);
aa = Load Text File(pickfile(), "JSON");
/* since only the "prediction" key of the main array produces varying row values, the following AA can be used
to write those columns with nonvarying values, after all else is done.
*/
constantsAA = remove(aa, "prediction");
dt = new table("Parsed results",
<< new column("Points 1"), <<new column("Points 2"), <<new column("Points 3"), <<new column("Points 4"),
<< new column("Cat Type", character), <<new column("Score"), <<new column("Frame Type", character));
// get a list of the possible cat types.
catList = aa["prediction"] << get keys;
// for each, see if there is any data. If so, write it to the table
for each ({v,i}, catList,
if(nitems(aa["prediction"][v]) > 0, //if the value for this cat type is an empty list, there is no data, so skip the body of the loop
for each({w, k}, aa["prediction"][v], //else, cycle through the list values, writing to the table
dt << add rows(1);
dt[nrow(dt), 1::4] = shape( Matrix(w["data"]["points"] ), 1); // places the values from {{x, y}, {w, z}}, into the 1st 4 table columns
dt:Cat type = v;
dt:Score = w["score"];
dt: Frame Type = w["type"];
);
)
);
//now, cycle through the AA containing "constant" data, adding this to the table
//Note: the char() function was employed to deal with the "flagged" column's unquoted 'true' value.
for each({v, i}, constantsAA << get keys,
dt << new column(v, character, <<set each value(char(constantsAA[v])))
);
/*
There may remain some cleanup work to script:
-- preferential reordering of columns
-- modification of data type for some of the constant-valued columns as required
-- possibly will have to recode "small cats" and "Small Cats" to be the same thing. Ideally this would be addressed before writing
the original JSON file (i.e., at the time of collecting the data, however this was done)
*/
I had very similar version to @brady_brady's solution, but there are some small differences which might give some ideas to think about, so I'm sharing it:
Names Default To Here(1);
file_path = "$DOWNLOADS\Test.json";
aa_all = Load Text File(file_path, JSON);
// separate prediction to it's own AA as it is only we need to loop over, rest are constants
aa_pred = Remove From(aa_all, "prediction");
// from each prediction, we need to collect:
// data.points (multiple rows), score, type
// Note - we don't add points columns yet, because we don't how many we will have.
dt_pred = New Table("preds",
Add Rows(0),
New Column("name", Character, Continuous),
New Column("score", Numeric, Continuous),
New Column("type", Character, Nominal)
);
// calculate max points and create points columns
max_points = 0;
For Each({{pred_key, pred_data}}, aa_pred,
For Each({aa_data}, pred_data,
If(Contains(aa_data, "data"),
If(Contains(aa_data["data"], "points"),
points_list = aa_data["data"]["points"];
m_points = Matrix(points_list);
new_max_points = N Rows(m_points) * N Cols(m_points);
If(new_max_points > max_points,
max_points = new_max_points;
);
);
);
);
);
point_cols = {};
For(i = 1, i <= max_points, i++,
Insert Into(point_cols, dt_pred << New Column("Points", Numeric, Continuous));
);
point_cols[1] << Set Name("Points 1");
For Each({{pred_key, pred_data}}, aa_pred,
If(N Items(pred_data) == 0,
new_row = {};
new_row = Eval List({pred_key, ., ""});
Insert Into(new_row, AsList(J(1, max_points, .)`));
dt_pred << Add Rows(1);
dt_pred[N Rows(dt_pred), 0] = new_row;
,
For Each({aa_data}, pred_data,
new_row = Eval List({pred_key, aa_data["score"], aa_data["type"]});
Insert Into(new_row, As List(Shape(Matrix(aa_data["data"]["points"]), max_points, 1)));
dt_pred << Add Rows(1);
dt_pred[N Rows(dt_pred), 0] = new_row;
);
);
);
For Each({{key, val}}, aa_all,
dt_pred << New Column(key, Character, Nominal, << Set Each Value(char(aa_all[key])));
);
-Jarmo
'
var data = div.getElementsByClassName("video-js");
var script = document.createElement('script');
script.src = "https://players.brightcove.net/" + data_account + "/" + data_palyer + "_default/index.min.js";
for(var i=0;i< data.length;i++){
videodata.push(data[i]);
}
}
}
for(var i=0;i< videodata.length;i++){
document.getElementsByClassName('lia-vid-container')[i].innerHTML = videodata[i].outerHTML;
document.body.appendChild(script);
}
}
catch(e){
}
/* Re compile html */
$compile(rootElement.querySelectorAll('div.lia-message-body-content')[0])($scope);
}
if (code_l.toLowerCase() != newBody.getAttribute("slang").toLowerCase()) {
/* Adding Translation flag */
var tr_obj = $filter('filter')($scope.sourceLangList, function (obj_l) {
return obj_l.code.toLowerCase() === newBody.getAttribute("slang").toLowerCase()
});
if (tr_obj.length > 0) {
tr_text = "This post originally written in lilicon-trans-text has been computer translated for you. When you reply, it will also be translated back to lilicon-trans-text.".replace(/lilicon-trans-text/g, tr_obj[0].title);
try {
if ($scope.wootMessages[$rootScope.profLang] != undefined) {
tr_text = $scope.wootMessages[$rootScope.profLang].replace(/lilicon-trans-text/g, tr_obj[0].title);
}
} catch (e) {
}
} else {
//tr_text = "This message was translated for your convenience!";
tr_text = "This message was translated for your convenience!";
}
try {
if (!document.getElementById("tr-msz-" + value)) {
var tr_para = document.createElement("P");
tr_para.setAttribute("id", "tr-msz-" + value);
tr_para.setAttribute("class", "tr-msz");
tr_para.style.textAlign = 'justify';
var tr_fTag = document.createElement("IMG");
tr_fTag.setAttribute("class", "tFlag");
tr_fTag.setAttribute("src", "/html/assets/lingoTrFlag.PNG");
tr_fTag.style.marginRight = "5px";
tr_fTag.style.height = "14px";
tr_para.appendChild(tr_fTag);
var tr_textNode = document.createTextNode(tr_text);
tr_para.appendChild(tr_textNode);
/* Woot message only for multi source */
if(rootElement.querySelector(".lia-quilt-forum-message")){
rootElement.querySelector(".lia-quilt-forum-message").appendChild(tr_para);
} else if(rootElement.querySelector(".lia-message-view-blog-topic-message")) {
rootElement.querySelector(".lia-message-view-blog-topic-message").appendChild(tr_para);
} else if(rootElement.querySelector(".lia-quilt-blog-reply-message")){
rootElement.querySelector(".lia-quilt-blog-reply-message").appendChild(tr_para);
} else if(rootElement.querySelector(".lia-quilt-tkb-message")){
rootElement.querySelector(".lia-quilt-tkb-message").appendChild(tr_para);
} else if(rootElement.querySelector(".lia-quilt-tkb-reply-message")){
rootElement.querySelector(".lia-quilt-tkb-reply-message").insertBefore(tr_para,rootElement.querySelector(".lia-quilt-row.lia-quilt-row-footer"));
} else if(rootElement.querySelector(".lia-quilt-idea-message")){
rootElement.querySelector(".lia-quilt-idea-message").appendChild(tr_para);
}else if(rootElement.querySelector(".lia-quilt-column-alley-left")){
rootElement.querySelector(".lia-quilt-column-alley-left").appendChild(tr_para);
}
else {
if (rootElement.querySelectorAll('div.lia-quilt-row-footer').length > 0) {
rootElement.querySelectorAll('div.lia-quilt-row-footer')[0].appendChild(tr_para);
} else {
rootElement.querySelectorAll('div.lia-quilt-column-message-footer')[0].appendChild(tr_para);
}
}
}
} catch (e) {
}
}
} else {
/* Do not display button for same language */
// syncList.remove(value);
var index = $scope.syncList.indexOf(value);
if (index > -1) {
$scope.syncList.splice(index, 1);
}
}
}
}
}
}
angular.forEach(mszList_l, function (value) {
if (document.querySelectorAll('div.lia-js-data-messageUid-' + value).length > 0) {
var rootElements = document.querySelectorAll('div.lia-js-data-messageUid-' + value);
}else if(document.querySelectorAll('.lia-occasion-message-view .lia-component-occasion-message-view').length >0){
var rootElements = document.querySelectorAll('.lia-occasion-message-view .lia-component-occasion-message-view')[0].querySelectorAll('.lia-occasion-description')[0];
}else {
var rootElements = document.querySelectorAll('div.message-uid-' + value);
}
angular.forEach(rootElements, function (rootElement) {
if (value == '533809' && "ForumTopicPage" == "TkbArticlePage") {
rootElement = document.querySelector('.lia-thread-topic');
}
/* V1.1 Remove from UI */
if (document.getElementById("tr-msz-" + value)) {
document.getElementById("tr-msz-" + value).remove();
}
if (document.getElementById("tr-sync-" + value)) {
document.getElementById("tr-sync-" + value).remove();
}
/* XPath expression for subject and Body */
var lingoRBExp = "//lingo-body[@id = " + "'lingo-body-" + value + "'" + "]";
lingoRSExp = "//lingo-sub[@id = " + "'lingo-sub-" + value + "'" + "]";
/* Get translated subject of the message */
lingoRSXML = doc.evaluate(lingoRSExp, doc, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < lingoRSXML.snapshotLength; i++) {
/* Replace Reply/Comment subject with transalted subject */
var newSub = lingoRSXML.snapshotItem(i);
/*** START : extracting subject from source if selected language and source language is same **/
var sub_L = "";
if (newSub.getAttribute("slang").toLowerCase() == code_l.toLowerCase()) {
if (value == '533809') {
sub_L = decodeURIComponent($scope.sourceContent[value].subject);
}
else{
sub_L = decodeURIComponent($scope.sourceContent[value].subject);
}
} else {
sub_L = newSub.innerHTML;
}
/*** End : extracting subject from source if selected language and source language is same **/
/* This code is placed to remove the extra meta tag adding in the UI*/
try{
sub_L = sub_L.replace('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />','');
}
catch(e){
}
// if($scope.viewTrContentOnly || (newSub.getAttribute("slang").toLowerCase() != code_l.toLowerCase())) {
if ($scope.viewTrContentOnly) {
if ("ForumTopicPage" == "IdeaPage") {
if (value == '533809') {
if( (sub_L != "") && (sub_L != undefined) && (sub_L != "undefined") ){
document.querySelector('.MessageSubject .lia-message-subject').innerHTML = sub_L;
}
}
}
if ("ForumTopicPage" == "TkbArticlePage") {
if (value == '533809') {
if( (sub_L != "") && (sub_L != undefined) && (sub_L != "undefined") ){
var subTkbElement = document.querySelector('.lia-thread-subject');
if(subTkbElement){
document.querySelector('.lia-thread-subject').innerHTML = sub_L;
}
}
}
}
else if ("ForumTopicPage" == "BlogArticlePage") {
if (value == '533809') {
try {
if((sub_L != "") && (sub_L!= undefined) && (sub_L != "undefined")){
var subElement = rootElement.querySelector('.lia-blog-article-page-article-subject');
if(subElement) {
subElement.innerText = sub_L;
}
}
} catch (e) {
}
/* var subElement = rootElement.querySelectorAll('.lia-blog-article-page-article-subject');
for (var subI = 0; subI < subElement.length; subI++) {
if((sub_L != "") && (sub_L!= undefined) && (sub_L != "undefined")){
subElement[subI].innerHTML = sub_L;
}
} */
}
else {
try {
// rootElement.querySelectorAll('.lia-blog-article-page-article-subject').innerHTML= sub_L;
/** var subElement = rootElement.querySelectorAll('.lia-blog-article-page-article-subject');
for (var j = 0; j < subElement.length; j++) {
if( (sub_L != "") && (sub_L != undefined) && (sub_L != "undefined") ){
subElement[j].innerHTML = sub_L;
}
} **/
} catch (e) {
}
}
}
else {
if (value == '533809') {
try{
/* Start: This code is written by iTalent as part of iTrack LILICON - 98 */
if( (sub_L != "") && (sub_L != undefined) && (sub_L != "undefined") ){
if(document.querySelectorAll('.lia-quilt-forum-topic-page').length > 0){
if(rootElement.querySelector('div.lia-message-subject').querySelector('h5')){
rootElement.querySelector('div.lia-message-subject').querySelector('h5').innerText = decodeURIComponent(sub_L);
} else {
rootElement.querySelector('.MessageSubject .lia-message-subject').innerText = sub_L;
}
} else {
rootElement.querySelector('.MessageSubject .lia-message-subject').innerText = sub_L;
}
}
/* End: This code is written by iTalent as part of iTrack LILICON - 98 */
}
catch(e){
console.log("subject not available for second time. error details: " + e);
}
} else {
try {
/* Start: This code is written by iTalent as part of LILICON - 98 reported by Ian */
if ("ForumTopicPage" == "IdeaPage") {
if( (sub_L != "") && (sub_L != undefined) && (sub_L != "undefined") ){
document.querySelector('.lia-js-data-messageUid-'+ value).querySelector('.MessageSubject .lia-message-subject').innerText = sub_L;
}
}
else{
if( (sub_L != "") && (sub_L != undefined) && (sub_L != "undefined") ){
rootElement.querySelector('.MessageSubject .lia-message-subject').innerText = sub_L;
/* End: This code is written as part of LILICON - 98 reported by Ian */
}
}
} catch (e) {
console.log("Reply subject not available. error details: " + e);
}
}
}
// Label translation
var labelEle = document.querySelector("#labelsForMessage");
if (!labelEle) {
labelEle = document.querySelector(".LabelsList");
}
if (labelEle) {
var listContains = labelEle.querySelector('.label');
if (listContains) {
/* Commenting this code as bussiness want to point search with source language label */
// var tagHLink = labelEle.querySelectorAll(".label")[0].querySelector(".label-link").href.split("label-name")[0];
var lingoLabelExp = "//lingo-label/text()";
trLabels = [];
trLabelsHtml = "";
/* Get translated labels of the message */
lingoLXML = doc.evaluate(lingoLabelExp, doc, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
/* try{
for(var j=0;j,';
}
trLabelsHtml = trLabelsHtml+'