Skip to content

Utils

infer_features(example)

Infer Hugging Face Datasets Features from an example.

Source code in mbodied/data/utils.py
64
65
66
def infer_features(example) -> Features:
    """Infer Hugging Face Datasets Features from an example."""
    return Features(to_features(example))

to_features(indict, image_keys=None, exclude_keys=None, prefix='')

Convert a dictionary to a Datasets Features object.

Parameters:

Name Type Description Default
indict dict

The dictionary to convert.

required
image_keys dict

A dictionary of keys that should be treated as images.

None
exclude_keys set

A set of full-path-keys to exclude.

None
prefix str

A prefix to add to the keys.

''
Source code in mbodied/data/utils.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def to_features(indict, image_keys=None, exclude_keys=None, prefix="") -> Features:
    """Convert a dictionary to a Datasets Features object.

    Args:
        indict (dict): The dictionary to convert.
        image_keys (dict): A dictionary of keys that should be treated as images.
        exclude_keys (set): A set of full-path-keys to exclude.
        prefix (str): A prefix to add to the keys.
    """
    if exclude_keys is None:
        exclude_keys = set()

    if image_keys is None:
        image_keys = {}

    if isinstance(indict, str):
        return Value("string")
    if isinstance(indict, int):
        return Value("int32")
    if isinstance(indict, float):
        return Value("float32")
    if isinstance(indict, np.int32):
        return Value("int32")
    if isinstance(indict, np.float32):
        return Value("float32")

    if isinstance(indict, list | tuple | np.ndarray):
        if len(indict) == 0:
            raise ValueError("Cannot infer schema from empty list")
        return [to_features(indict[0])]

    if isinstance(indict, dict):
        out_dict = {}
        for key, value in indict.items():
            full_key = f"{prefix}.{key}" if prefix else key
            if full_key in image_keys and full_key not in exclude_keys:
                out_dict[key] = Image(decode=True)
            elif full_key not in exclude_keys:
                out_dict[key] = to_features(value, image_keys, exclude_keys, full_key)
        return out_dict

    raise ValueError(f"Cannot infer schema from {indict}")