Create dynamic level nested dict from a list of objects?

Question

I am trying to turn a list of objects into a nested dict which could be accessed by indexes.

The following code works for a two-level nested dictionary. I would like to extend it to work flexibly for any number of levels.

from collections import namedtuple
import pprint 

Holding = namedtuple('holding', ['portfolio', 'ticker', 'shares'])
lst = [
        Holding('Large Cap', 'TSLA', 100),
        Holding('Large Cap', 'MSFT', 200),
        Holding('Small Cap', 'UTSI', 500)
]

def indexer(lst, indexes):
    """Creates a dynamic nested dictionary based on indexes."""
    result = {}
    for item in lst:
        index0 = getattr(item, indexes[0])
        index1 = getattr(item, indexes[1])
        result.setdefault(index0, {}).setdefault(index1, [])
        result[index0][index1].append(item)
    return result 


d = indexer(lst, ['portfolio', 'ticker'])
pp = pprint.PrettyPrinter()
pp.pprint(d)

Outputs:

{'Large Cap': {'MSFT': [holding(portfolio='Large Cap', ticker='MSFT', shares=200)],
               'TSLA': [holding(portfolio='Large Cap', ticker='TSLA', shares=100)]},
 'Small Cap': {'UTSI': [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}

martineau · Answer 1 · 2017-12-27T17:14:49.520

One of the best ways I've ever seen to implement nested dictionaries is Aaron Hall's answer to the question What is the best way to implement nested dictionaries?. This is an example of implementing a type that does something called "Autovivification" in the Perl programming language.

Anyway, using one here would be useful because it means you only need to call setdefault() for the "leaves" of your tree-like data structure (which are lists, not sub-dictionaries).

So here's an answer to your question that makes use of it:

from collections import namedtuple
from functools import reduce
from operator import attrgetter
from pprint import pprint


Holding = namedtuple('Holding', ['portfolio', 'ticker', 'shares'])

lst = [Holding('Large Cap', 'TSLA', 100),
       Holding('Large Cap', 'MSFT', 200),
       Holding('Small Cap', 'UTSI', 500),]

def indexer(lst, indexes):
    """ Creates a dynamic nested dictionary based on indexes. """

    class Vividict(dict):
        """ dict subclass which dynamically creates sub-dictionaries when
            they're first referenced (and don't exist).
            See https://stackoverflow.com/a/19829714/355230
        """
        def __missing__(self, key):
            value = self[key] = type(self)()
            return value

    result = Vividict()
    index_getters = attrgetter(*indexes)
    for item in lst:
        *indices, leaf = index_getters(item)  # Leaves are lists, not dicts.
        target = reduce(lambda x, y: x[y], indices, result)
        target.setdefault(leaf, []).append(item)

    return result

d = indexer(lst, ['portfolio', 'ticker'])
pprint(d)
print()
d = indexer(lst, ['portfolio', 'ticker', 'shares'])
pprint(d)

Output:

{'Large Cap': {'MSFT': [Holding(portfolio='Large Cap', ticker='MSFT', shares=200)],
               'TSLA': [Holding(portfolio='Large Cap', ticker='TSLA', shares=100)]},
 'Small Cap': {'UTSI': [Holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}

{'Large Cap': {'MSFT': {200: [Holding(portfolio='Large Cap', ticker='MSFT', shares=200)]},
               'TSLA': {100: [Holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}},
 'Small Cap': {'UTSI': {500: [Holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}}

schwobaseggl · Accepted Answer · 2017-12-26T21:53:55.270

You could try sth along the following lines. Just iterate the list of attribtes specified by the indexes and keep following down the thus created nested dict:

def indexer(lst, indexes):
    result = {}
    for item in lst:
        attrs = [getattr(item, i) for i in indexes]
        crnt = result  # always the dict at the current nesting level
        for attr in attrs[:-1]:
            # follow one level deeper
            crnt = crnt.setdefault(attr, {})  
        crnt.setdefault(attrs[-1], []).append(item)
    return result

This produces the following outputs:

>>> d = indexer(lst, ['portfolio', 'ticker'])
{'Large Cap': {'ticker': [holding(portfolio='Large Cap', ticker='TSLA', shares=100),
                          holding(portfolio='Large Cap', ticker='MSFT', shares=200)]},
 'Small Cap': {'ticker': [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}

>>> d = indexer(lst, ['portfolio', 'ticker', 'shares'])
{'Large Cap': {'MSFT': {200: [holding(portfolio='Large Cap', ticker='MSFT', shares=200)]},
               'TSLA': {100: [holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}},
 'Small Cap': {'UTSI': {500: [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}}

You actually have an error. Your deepest key is having the name of the last index, not of the last value of the tuples. — Glenn D.J., Dec 26 '17 at 21:39
Your last line should probably be `crnt.setdefault(getattr(item, last_index), []).append(item)` — Glenn D.J., Dec 26 '17 at 21:40

score 0 · Answer 3 · answered Dec 26 '17 at 21:33

Your code was actually a good attempt, the small addition I've made was maintaining the current map that the last index introduced and let the next index create a map in there. So for each index (and thus each iteration in the loop) you actually go a level deeper. At the last level a list is created instead of a dictionary, and after the loop the item is simply appended to the current level.

def indexer(lst, indexes):
    result = {}
    for item in lst:
        current_level = result
        for i, index in enumerate(indexes):
            key = getattr(item, index)
            current_level.setdefault(key, [] if i == len(indexes)-1 else {})  # if we are in the last iteration, create a list instead of a dict
            current_level = current_level[key]
        current_level.append(item)
    return result

Eric Duminil · Answer 4 · 2017-12-27T17:11:38.150

This question might be more suitable for CodeReview.

Since your code is working, here are a few hints:

namedtuple returns a class. holding should be written Holding.
lst is too generic. It's a list of Holding instances, it could be called holdings.
index0 is not a list index but a dict key.
You could use nested defaultdicts instead of calling setdefault repeatedly.

Here's an example:

from collections import namedtuple, defaultdict
import pprint

Holding = namedtuple('holding', ['portfolio', 'ticker', 'shares'])
holdings = [
    Holding('Large Cap', 'TSLA', 100),
    Holding('Large Cap', 'MSFT', 200),
    Holding('Small Cap', 'UTSI', 500)
]


def default_tree(depth, leaf):
    if depth == 1:
        return defaultdict(leaf)
    else:
        return defaultdict(lambda: default_tree(depth - 1, leaf))

def indexer(lst, attributes):
    """Creates a dynamic nested dictionary based on indexes."""
    root = default_tree(len(attributes), list)
    for item in lst:
        node = root
        for attribute in attributes:
            key = getattr(item, attribute)
            node = node[key]
        node.append(item)
    return root


d = indexer(holdings, ['portfolio', 'ticker', 'shares'])
pp = pprint.PrettyPrinter()
pp.pprint(d)

Please read the question again. The original code did not work for a three level dict. schwobaseggl answered how to do that. 1. Point taken. 2. The function is meant to be generic. 3. True, but then the function should be called keys. 4. How? — ChaimG, Dec 26 '17 at 22:21

score 0 · Answer 5 · answered Feb 28 '18 at 18:37

I started using QueryList instead of nested dict's and it has made my life so much easier.

for example:

ql.filter(ticker="MSFT') will return a list of all MSFT records.

class QueryList(list):
    """Stores a list indexable by attributes.
    """

    def group_by(self, attrs) -> dict:
        """Like a database group_by function.

        args:
            attrs: a str or a list of the group_by attrs.

        Returns:
            {(attr_val0, attr_val1,...): QueryList(),
             ...,
             }
            -- or --
            {attr_val: QueryList(),
            attr_val: QueryList(),
            ...
            }
        """
        result = defaultdict(QueryList)
        if isinstance(attrs, str):
            for item in self:
                result[getattr(item, attrs)].append(item)
        else:
            for item in self:
                result[tuple(getattr(item, x) for x in attrs)].append(item)

        return result

    def filter(self, **kwargs):
        """Returns the subset of QueryList that has matching attributes.

        args:
            kwargs: Attribute name/value pairs.

        For example:
            foo.filter(portfolio='123', account='ABC') will return all matching items.
        """
        if len(kwargs) == 1:
            [(attr, val)] = kwargs.items()
            result = QueryList([x for x in self if getattr(x, attr) == val])
        else:
            attr_val_pairs = [(k, v) for k, v in kwargs.items()]
            d = self.group_by(tuple(x[0] for x in attr_val_pairs))
            result = d.get(tuple(x[1] for x in attr_val_pairs), QueryList())

        return result

    def scalar(self, default=None, attr=None):
        """Returns the first item in this QueryList.

        args:
            default: The value to return if there is less than one item,
                or if the attr is not found.
            attr: Returns getattr(item, attr) if not None.
        """
        item, = self[0:1] or [default]

        if attr is None:
            result = item
        else:
            result = getattr(item, attr, default)
        return result

Create dynamic level nested dict from a list of objects?

5 Answers5