Skip to content

Cli

auto(ctx, task, image_path, model_src, api_key, params)

Dynamically select and run the correct agent based on the task.

Example command

mbodied auto language --params "{\"instruction\": \"Tell me a math joke?\"}"

Response

Why was the equal sign so humble? Because it knew it wasn't less than or greater than anyone else!

Example command

mbodied auto motion-openvla --params "{\"instruction\": \"Move forward\", \"image\": \"resources/bridge_example.jpeg\"}" --model-src "https://api.mbodi.ai/community-models/"

Response

Response: HandControl(pose={'x': -0.00960310545, 'y': -0.0111081966, 'z': -0.00206002074, 'roll': 0.0126330038, 'pitch': -0.000780597846, 'yaw': -0.0177964902}, grasp={'value': 0.996078431})

Inputs

[task]: Task to be executed by the agent. Choices include: - language: Run language-related tasks. - motion-openvla: Use the OpenVlaAgent to generate robot motion. - sense-object-detection: Run object detection tasks. - sense-image-segmentation: Run image segmentation tasks. - sense-depth-estimation: Run depth estimation tasks.

[image-path]: (Optional) Path to an image file, required for sense and motion tasks. [model-src]: The source of the model, e.g., "openai", "gradio", etc. [api-key]: (Optional) API key for accessing the remote model. [params]: The parameters for the agent.

Outputs

[Response]: The output generated by the selected agent based on the task, such as HandControl for motion or detected objects for sensing tasks.

Source code in mbodied/agents/cli.py
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
@cli.command("auto")
@click.argument("task")
@click.option("--image-path", default=None, help="Optional path to the image file (for sense tasks).")
@click.option("--model-src", default="openai", help="Model source for agent")
@click.option("--api-key", default=None, help="API key for the remote model, if applicable.")
@click.option("--params", type=str, help="JSON string with parameters for the agent.")
@click.pass_context
def auto(ctx, task, image_path, model_src, api_key, params):
    r"""Dynamically select and run the correct agent based on the task.

    Example command:
        mbodied auto language --params "{\"instruction\": \"Tell me a math joke?\"}"

    Response:
        Why was the equal sign so humble?
        Because it knew it wasn't less than or greater than anyone else!

    Example command:
        mbodied auto motion-openvla --params "{\"instruction\": \"Move forward\", \"image\": \"resources/bridge_example.jpeg\"}" --model-src "https://api.mbodi.ai/community-models/"

    Response:
        Response: HandControl(pose={'x': -0.00960310545, 'y': -0.0111081966, 'z': -0.00206002074, 'roll': 0.0126330038, 'pitch': -0.000780597846, 'yaw': -0.0177964902}, grasp={'value': 0.996078431})

    Inputs:
        [task]: Task to be executed by the agent. Choices include:
            - language: Run language-related tasks.
            - motion-openvla: Use the OpenVlaAgent to generate robot motion.
            - sense-object-detection: Run object detection tasks.
            - sense-image-segmentation: Run image segmentation tasks.
            - sense-depth-estimation: Run depth estimation tasks.

        [image-path]: (Optional) Path to an image file, required for sense and motion tasks.
        [model-src]: The source of the model, e.g., "openai", "gradio", etc.
        [api-key]: (Optional) API key for accessing the remote model.
        [params]: The parameters for the agent.

    Outputs:
        [Response]: The output generated by the selected agent based on the task, such as HandControl for motion or detected objects for sensing tasks.
    """
    verbose = ctx.obj['VERBOSE']
    dry_run = ctx.obj['DRY_RUN']

    if verbose:
        print(f"Executing 'auto' command with task: {task}")

    if dry_run:
        print(f"Dry run: Would execute 'auto' with task: {task}")
        return
    AutoAgent = smart_import("mbodied.agents.auto.AutoAgent")
    Image = smart_import("mbodied.types.sense.Image")
    if params:
        try:
            options = json.loads(params)
        except json.JSONDecodeError:
            print("Invalid JSON format for parameters.")
            return

    else:
        options = {}
    if "image" not in options:
        image = Image(image_path) if image_path else None
        options["image"] = image
    else:
        options["image"] = Image(options["image"])
    model_kwargs = {"api_key": api_key} if api_key else {}
    kwargs = options
    auto_agent = AutoAgent(task=task, model_src=model_src, model_kwargs=model_kwargs)

    response = auto_agent.act(**kwargs)
    if verbose:
        print(f"[Verbose] Auto agent response: {response}")
    print(f"Response: {response}")

cli(ctx, verbose, dry_run, list, help)

CLI for various AI agents.

Source code in mbodied/agents/cli.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@click.group(invoke_without_command=True)
@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output.")
@click.option("--dry-run", is_flag=True, help="Simulate the action without executing.")
@click.option("--list", "-l", is_flag=True, help="List available agents.")
@click.option("--help", "-h", is_flag=True, help="Show this message and exit.")
@click.pass_context
def cli(ctx: click.Context, verbose, dry_run, list, help) -> None:
    """CLI for various AI agents."""
    if verbose:
        print("Verbose mode enabled.")
    if dry_run:
        print("Dry run mode enabled.")
    if list:
        list_agents(verbose)
    if not ctx.invoked_subcommand or help:
        ctx.get_help()

detect_objects(ctx, image_filename, model_src, objects, model_type, api_name, list)

Run the ObjectDetectionAgent to detect objects in an image.

Example command

mbodied sense detect resources/color_image.png --objects "remote, spoon" --model-type "YOLOWorld"

Response

Annotated Image: The image with detected objects highlighted and labeled.

Inputs

[image_filename]: Path to the image file. [objects]: Comma-separated list of objects to detect (e.g., "car, person"). [model_type]: Model type to use for detection (e.g., "YOLOWorld", "Grounding DINO").

Outputs

[Annotated Image]: Display of the image with detected objects and their bounding boxes.

API documentation: https://api.mbodi.ai/sense/

Source code in mbodied/agents/cli.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
@sense.command("detect")
@click.argument("image_filename", required=False)
@click.option("--model-src", default="https://api.mbodi.ai/sense/", help="The model source URL.")
@click.option(
    "--objects", prompt=False, help="Comma-separated list of objects to detect."
)
@click.option(
    "--model-type",
    type=click.Choice(["YOLOWorld", "Grounding DINO"], case_sensitive=False),
    prompt=False,
    help="The model type to use for detection.",
)
@click.option("--api-name", default="/detect", help="The API endpoint to use.")
@click.option("--list", "-l", is_flag=True, help="List available models for object detection.")
@click.pass_context
def detect_objects(ctx, image_filename, model_src, objects, model_type, api_name, list) -> None:
    """Run the ObjectDetectionAgent to detect objects in an image.

    Example command:
        mbodied sense detect resources/color_image.png --objects "remote, spoon" --model-type "YOLOWorld"

    Response:
        Annotated Image: The image with detected objects highlighted and labeled.

    Inputs:
        [image_filename]: Path to the image file.
        [objects]: Comma-separated list of objects to detect (e.g., "car, person").
        [model_type]: Model type to use for detection (e.g., "YOLOWorld", "Grounding DINO").

    Outputs:
        [Annotated Image]: Display of the image with detected objects and their bounding boxes.

    API documentation: https://api.mbodi.ai/sense/
    """
    verbose = ctx.obj['VERBOSE']
    dry_run = ctx.obj['DRY_RUN']

    if list:
        print("Available Object Detection Models:")
        print("- Grounding DINO")
        print("- YOLOWorld")
        return

    if image_filename is None:
        print("Error: Missing argument 'IMAGE_FILENAME'. Specify an image filename")
        return

    if objects is None:
        objects = click.prompt(
            "Objects to detect (comma-separated)"
        )

    if model_type is None:
        model_type = click.prompt(
            "Model Type", 
            type=click.Choice(["YOLOWorld", "Grounding DINO"], case_sensitive=False)
        )

    if verbose:
        print(f"Running object detection on {image_filename} using {model_type}")

    if dry_run:
        print(f"Dry run: Would detect objects in {image_filename} with model: {model_type}, objects: {objects}")
        return
    Image = smart_import("mbodied.types.sense.Image")
    ObjectDetectionAgent = smart_import("mbodied.agents.sense.ObjectDetectionAgent")
    image = Image(image_filename, size=(224, 224))
    objects_list = objects.split(",")
    agent: "ObjectDetectionAgent" = ObjectDetectionAgent(model_src=model_src)
    result = agent.act(image=image, objects=objects_list, model_type=model_type, api_name=api_name)
    if verbose:
        print("Displaying annotated image.")
    result.annotated.pil.show()

estimate_depth(ctx, image_filename, model_src, api_name, list)

Run the DepthEstimationAgent to estimate depth from an image.

Example command

mbodied sense depth path/to/image.png

Response

Depth map image displaying the estimated depth information for each pixel.

Inputs

[image_filename]: Path to the image file (e.g., PNG or RGBD image).

Outputs

[Depth Estimation Response]: A depth map image representing the depth information in the image.

Loaded as API: https://api.mbodi.ai/sense/depth API Endpoint: /depth

Source code in mbodied/agents/cli.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
@sense.command("depth")
@click.argument("image_filename", required=False)
@click.option("--model-src", default="https://api.mbodi.ai/sense/", help="The model source URL.")
@click.option("--api-name", default="/depth", help="The API endpoint to use.")
@click.option("--list", "-l", is_flag=True, help="List available models for depth estimation.")
@click.pass_context
def estimate_depth(ctx, image_filename, model_src, api_name, list) -> None:
    """Run the DepthEstimationAgent to estimate depth from an image.

    Example command:
        mbodied sense depth path/to/image.png

    Response:
        Depth map image displaying the estimated depth information for each pixel.

    Inputs:
        [image_filename]: Path to the image file (e.g., PNG or RGBD image).

    Outputs:
        [Depth Estimation Response]: A depth map image representing the depth information in the image.

    Loaded as API: [https://api.mbodi.ai/sense/depth](https://api.mbodi.ai/sense/depth)
    API Endpoint: [/depth](https://api.mbodi.ai/sense/depth)
    """
    verbose = ctx.obj['VERBOSE']
    dry_run = ctx.obj['DRY_RUN']

    if list:
        print("Available Depth Estimation Models:")
        print("- Depth Anything")
        print("- Zoe Depth")
        ctx.exit()

    if image_filename is None:
        print("Error: Missing argument 'IMAGE_FILENAME'. Specify an image filename")
        return

    if verbose:
        print(f"Running depth estimation on {image_filename}")

    if dry_run:
        print(f"Dry run: Would estimate from image in {image_filename}")
        return
    Image = smart_import("mbodied.types.sense.Image")
    DepthEstimationAgent = smart_import("mbodied.agents.sense.DepthEstimationAgent")
    image = Image(image_filename, size=(224, 224))
    agent: "DepthEstimationAgent" = DepthEstimationAgent(model_src=model_src)
    result = agent.act(image=image, api_name=api_name)
    result.pil.show()

language_chat(ctx, model_src, api_key, context, instruction, image_path, loop)

Run the LanguageAgent to interact with users using natural language.

Example command

mbodied language --instruction "What type of robot is this?" --image-path resources/color_image.png

Response

This is a robotic arm, specifically a PR2 (Personal Robot 2) developed by Willow Garage.

Inputs

[model_src]: The model source for the LanguageAgent (e.g., openai, anthropic, or a gradio URL). [api_key]: Optional API key for the remote actor, if needed. [context]: Starting context for the conversation (optional). [instruction]: Instruction or query for the LanguageAgent to respond to. [image_path]: Optional path to an image file to include as part of the input. [loop]: If set, the agent will continue running and accepting new instructions.

Outputs

[Response]: The natural language response generated by the LanguageAgent.

Source code in mbodied/agents/cli.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
@cli.command("language")
@click.option(
    "--model-src",
    default="openai",
    help="The model source for the LanguageAgent. i.e. openai, anthropic, gradio url, etc",
)
@click.option("--api-key", default=None, help="API key for the remote actor (if applicable).")
@click.option("--context", default=None, help="Starting context for the conversation.")
@click.option("--instruction", prompt="Instruction", help="Instruction for the LanguageAgent.")
@click.option("--image-path", default=None, help="Optional path to the image file.")
@click.option("--loop", is_flag=True, help="Keep the agent running for multiple instructions.")
@click.pass_context
def language_chat(ctx, model_src, api_key, context, instruction, image_path, loop) -> None:
    """Run the LanguageAgent to interact with users using natural language.

    Example command:
        mbodied language --instruction "What type of robot is this?" --image-path resources/color_image.png

    Response:
        This is a robotic arm, specifically a PR2 (Personal Robot 2) developed by Willow Garage.

    Inputs:
        [model_src]: The model source for the LanguageAgent (e.g., openai, anthropic, or a gradio URL).
        [api_key]: Optional API key for the remote actor, if needed.
        [context]: Starting context for the conversation (optional).
        [instruction]: Instruction or query for the LanguageAgent to respond to.
        [image_path]: Optional path to an image file to include as part of the input.
        [loop]: If set, the agent will continue running and accepting new instructions.

    Outputs:
        [Response]: The natural language response generated by the LanguageAgent.
    """
    verbose = ctx.obj['VERBOSE']
    dry_run = ctx.obj['DRY_RUN']
    LanguageAgent: "LanguageAgent" = smart_import("mbodied.agents.language.LanguageAgent") # type: ignore # noqa
    Image = smart_import("mbodied.types.sense.Image") # type: ignore # noqa
    if verbose:
        print(f"Running language agent from {model_src}")

    if dry_run:
        print(f"Dry run: Would run LanguageAgent with model: {model_src}, instruction: {instruction}")
        return

    agent: "LanguageAgent" = LanguageAgent(model_src=model_src, api_key=api_key, context=context)
    image = Image(image_path) if image_path else None
    while True:
        try:
            instruction = Prompt.ask("Enter instruction (or 'exit' to stop): ")
            response = agent.act(instruction=instruction, image=image, context=context)
            print("Response:", response)

            if instruction.lower() == "exit" or not loop:
                break
        except KeyboardInterrupt:
            print("Interrupted.")
            break

list_agents(verbose)

List available agents.

Source code in mbodied/agents/cli.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def list_agents(verbose) -> None:
    """List available agents."""
    import inspect
    import sys

    from rich.table import Table

    for mode in ["language", "sense", "motion"]:
        table = Table(title=f"{mode.capitalize()} Agents")
        table.add_column("Agent Name", style="bold cyan")
        table.add_column("Description", style="blue")

        smart_import(f"mbodied.agents.{mode}")
        seen = set()
        for agent in inspect.getmembers(sys.modules[f"mbodied.agents.{mode}"], inspect.isclass):
            if agent[0].endswith("Agent") and agent[0] not in seen:
                description = inspect.getdoc(agent[1])[:100] if inspect.getdoc(agent[1]) else "No description available."
                if verbose:
                    description = Markdown("""```python\n""" + inspect.getdoc(agent[1]))
                table.add_row(agent[0], description)
                seen.add(agent[0])

        console.print(table, overflow="ellipsis")
    console.print("\n")
    if not verbose:
        console.print("Hint: Rerun with `--verbose` to see full descriptions.")
    console.print(Markdown("For more information, run `mbodied [language | sense | motion] --help`."))
    console.print("\n")

motion(ctx, list)

Commands related to robot motion tasks.

Source code in mbodied/agents/cli.py
349
350
351
352
353
354
355
356
357
358
359
360
@cli.group(invoke_without_command=True)
@click.option("--list", "-l", is_flag=True, help="List available models for motion.")
@click.pass_context
def motion(ctx, list):
    """Commands related to robot motion tasks."""
    if list:
        print("Available Motion Models:")
        print("- OPENVLA MODEL")
        ctx.exit()

    if ctx.invoked_subcommand is None:
            print("No subcommand provided. Try 'mbodied motion --help' for help")

openvla_motion(ctx, instruction, image_filename, model_src, unnorm_key)

Run the OpenVlaAgent to generate robot motion based on instruction and image.

Example command

mbodied motion openvla resources/xarm.jpeg --instruction "move forward"

Response

Motion Response: HandControl( pose=Pose6D( x=-0.000432461563, y=0.000223397129, z=-0.000241243806, roll=-0.000138880808, pitch=0.00122899628, yaw=-6.67113405e-05 ), grasp=JointControl(value=0.996078431) )

Inputs

[image_filename]: Path to the image file. [instruction]: Instruction for the robot to act on.

Outputs

[Motion Response]: HandControl object containing pose and grasp information.

Loaded as API: https://api.mbodi.ai/community-models/

Source code in mbodied/agents/cli.py
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
@motion.command("openvla")
@click.argument("image_filename")
@click.option("--instruction", prompt="Instruction", help="Instruction for the OpenVlaAgent.")
@click.option("--model-src", default="https://api.mbodi.ai/community-models/", help="The model source URL.")
@click.option("--unnorm-key", default="bridge_orig", help="Key for the unnormalized image.")
@click.pass_context
def openvla_motion(ctx, instruction, image_filename, model_src, unnorm_key) -> None:
    """Run the OpenVlaAgent to generate robot motion based on instruction and image.

    Example command:
        mbodied motion openvla resources/xarm.jpeg --instruction "move forward"

    Response:
        Motion Response:
        HandControl(
            pose=Pose6D(
                x=-0.000432461563,
                y=0.000223397129,
                z=-0.000241243806,
                roll=-0.000138880808,
                pitch=0.00122899628,
                yaw=-6.67113405e-05
            ),
            grasp=JointControl(value=0.996078431)
        )

    Inputs:
        [image_filename]: Path to the image file.
        [instruction]: Instruction for the robot to act on.

    Outputs:
        [Motion Response]: HandControl object containing pose and grasp information.
    Loaded as API: [https://api.mbodi.ai/community-models/](https://api.mbodi.ai/community-models/)
    """
    verbose = ctx.obj['VERBOSE']
    dry_run = ctx.obj['DRY_RUN']

    if verbose:
        print(f"Running OpenVLA motion agent on {image_filename} with instruction: {instruction}")

    if dry_run:
        print(f"Dry run: Would generate robot motion from {image_filename} with instruction: {instruction}")
        return
    Image = smart_import("mbodied.types.sense.Image")
    OpenVlaAgent = smart_import("mbodied.agents.motion.OpenVlaAgent")

    image = Image(image_filename, size=(224, 224))
    agent = OpenVlaAgent(model_src=model_src)
    motion_response = agent.act(instruction=instruction, image=image, unnorm_key=unnorm_key)

    print("Motion Response:", motion_response.flatten())

segment(ctx, image_filename, model_src, segment_type, segment_input, api_name, list)

Run the SegmentationAgent to segment objects in an image.

Example command

mbodied sense segment resources/color_image.png --segment-type "bbox" --segment-input "50,50,150,150"

Response

Masks shape: (1, 720, 1280)

Inputs

[image_filename]: Path to the image file. [segment-type]: The type of segmentation input, either bbox for bounding box or coords for pixel coordinates. [segment-input]: The input data, either bounding box coordinates as x1,y1,x2,y2 or pixel coordinates as u,v.

Outputs

[Masks]: A 2D mask indicating the segmented region in the image.

Loaded as API: https://api.mbodi.ai/sense/segment API Endpoint: /segment

Source code in mbodied/agents/cli.py
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
@sense.command("segment")
@click.argument("image_filename", required=False)
@click.option("--model-src", default="https://api.mbodi.ai/sense/", help="The model source URL.")
@click.option(
    "--segment-type",
    type=click.Choice(["bbox", "coords"], case_sensitive=False),
    prompt=False,
    help="Type of input data `bbox` (bounding box) or `coords` (pixel coordinates).",
)
@click.option(
    "--segment-input",
    prompt=False,
    help="Bounding box coordinates as x1,y1,x2,y2 or pixel coordinates as u,v.",
)
@click.option("--api-name", default="/segment", help="The API endpoint to use.")
@click.option("--list", "-l", is_flag=True, help="List available models for segmentation.")
@click.pass_context
def segment(ctx, image_filename, model_src, segment_type, segment_input, api_name, list) -> None:
    """Run the SegmentationAgent to segment objects in an image.

    Example command:
        mbodied sense segment resources/color_image.png --segment-type "bbox" --segment-input "50,50,150,150"

    Response:
        Masks shape:
        (1, 720, 1280)

    Inputs:
        [image_filename]: Path to the image file.
        [segment-type]: The type of segmentation input, either `bbox` for bounding box or `coords` for pixel coordinates.
        [segment-input]: The input data, either bounding box coordinates as `x1,y1,x2,y2` or pixel coordinates as `u,v`.

    Outputs:
        [Masks]: A 2D mask indicating the segmented region in the image.

    Loaded as API: [https://api.mbodi.ai/sense/segment](https://api.mbodi.ai/sense/segment)
    API Endpoint: [/segment](https://api.mbodi.ai/sense/depth)
    """
    verbose = ctx.obj['VERBOSE']
    dry_run = ctx.obj['DRY_RUN']

    if list:
        print("Available Segmentation Models:")
        print("- Segment Anything(SAM2)")
        ctx.exit()

    if image_filename is None:
        print("Error: Missing argument 'IMAGE_FILENAME'. Specify an image filename")
        return

    if segment_type is None:
        segment_type = click.prompt(
            "Input type - bounding box or pixel coordinates", 
            type=click.Choice(["bbox", "coords"], case_sensitive=False)
        )

    if segment_input is None:
        segment_input = click.prompt(
            "Segment input data - x1,y1,x2,y2 (for bbox) or u,v (for coords)"
        )

    if verbose:
        print(f"Running segmentation agent on {image_filename} to segment {segment_input}")

    if dry_run:
        print(f"Dry run: Would segment objects in {image_filename}")
        return
    Image = smart_import("mbodied.types.sense.Image")
    SegmentationAgent = smart_import("mbodied.agents.sense.SegmentationAgent")
    BBox2D = smart_import("mbodied.types.geometry.BBox2D")
    PixelCoords = smart_import("mbodied.types.geometry.PixelCoords")
    image = Image(image_filename, size=(224, 224))
    agent = SegmentationAgent(model_src=model_src)

    if segment_type == "bbox":
        bbox_coords = list(map(int, segment_input.split(",")))
        input_data = BBox2D(x1=bbox_coords[0], y1=bbox_coords[1], x2=bbox_coords[2], y2=bbox_coords[3])
    elif segment_type == "coords":
        u, v = map(int, segment_input.split(","))
        input_data = PixelCoords(u=u, v=v)

    mask_image, masks = agent.act(image=image, input_data=input_data, api_name=api_name)
    print("Masks shape:", masks.shape)
    mask_image.pil.show()

sense(ctx, list)

Commands related to sensing tasks (detection, segmentation, depth estimation).

Source code in mbodied/agents/cli.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
@cli.group(invoke_without_command=True)
@click.option("--list", "-l", is_flag=True, help="List available sensory models.")
@click.pass_context
def sense(ctx, list):
    """Commands related to sensing tasks (detection, segmentation, depth estimation)."""
    if list:
        print("Available Sensory Models:")
        print("- Object Detection Models:")
        print("  - Grounding DINO")
        print("  - YOLOWorld")
        print("- Depth Estimation Models:")
        print("  - Depth Anything")
        print("  - Zoe Depth")
        print("- Segmentation Models:")
        print("  - Segment Anything(SAM2)")
        ctx.exit()

    if ctx.invoked_subcommand is None:
            print("No subcommand provided. Try 'mbodied sense --help' for help")

version()

Display the version of mbodied.

Source code in mbodied/agents/cli.py
487
488
489
490
@cli.command("version")
def version():
    """Display the version of mbodied."""
    print(f"mbodied version: {__version__}")