Finding most similar file in json tree

I have a JSON tree of files that looks like this
{"file1": { "subFile": { "moreSubFiles":{} }, "subFile2": {} }
And the user will give me a file path that looks like this file1/subFile/moreSubFiles and I will give them the name of a random subdirectory. But sometimes the requested file will not exist, or there will be no subdirectories, so I would like to give them a random file from the most similar path. I am not the best at coding so I spent hours on this problem and wrote hundreds of lines of code that didn’t work. I was wondering if anyone had a good method of doing this?

This might get you going, you want to split the path into directories and then loop through your object trying to find the end point. In this example null is returned if the path doesn’t match your object structure.

var directories = {
  "file1": {
    "subFile": {
      "moreSubFiles":{}
    },
    "subFile2": {}
  }
}

var find = function(path) {
  var parts = path.split('/');
  var directory = directories;
  for (var i=0; i < parts.length; i++) {
    var part = parts[i];
    directory = directory[part];
    if (!directory) {
      return null;
    }
  }
  return directory;
}

console.log(find('file1/subFile/moreSubFiles'));
console.log(find('file1/subFilez/noFiles'));
1 Like

I meant that you input the path, and if that path doesn’t exist it gives you the path to the most similar object. Thank you so much anyways :grinning:

Depends how you define “most similar object”, I suppose. If you want to apply some sort of string metric, you might have a look at this Levenshtein npm module for instance… in @markbrown4’s find function, you’d iterate through all properties of the current (sub-)object and proceed with the one with the shortest distance.

1 Like

By the way, having the direct parent directory being more similar is more important than having the farther parents being similar. Actually the way I tried to do it myself is by generating an array of all the file paths in that object and finding the one with the least levenshtein distance where the levenshtein distance of each part is multiplied by the amount of slashes before it. But I spent many hours debugging and ended up with half solutions that are hundreds of lines long. Sadly I deleted them so I can’t show anything.

I just shamelessly forked @markbrown4’s algo (hope that’s ok) and added that Levenshtein functionality… but if you want the direct parent to be more similar you’d probably have to do a full tree search.

var levenshtein = require('levenshtein');

var directories = {
    "file1": {
        "subFile": {
            "moreSubFiles":{}
        },
        "subFile2": {}
    }
};

var closest = function(directory, part) {
    var shortest = Infinity,
        match = null,
        ls;

    for (var i in directory) {
        ls = new levenshtein(i, part);
        
        if (ls.distance < shortest) {
            shortest = ls.distance;
            match = directory[i];
        }
    }

    return match;
};

var find = function(path) {
    var parts = path.split('/'),
        directory = directories,
        part;

    for (var i in parts) {
        part = parts[i];
        directory = directory[part] || closest(directory, part);
        
        if (!directory) return null;
    }

    return directory;
};

console.log(find('file1/subFilf')); // gets you to "moreSubFiles"

2 Likes

Thank you!

This topic was automatically closed 91 days after the last reply. New replies are no longer allowed.