Skip to content

Commit 12d4d11

Browse files
implement operator
1 parent 31c4877 commit 12d4d11

File tree

2 files changed

+237
-12
lines changed

2 files changed

+237
-12
lines changed

__init__.py

+235
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
import os
2+
3+
os.environ['FIFTYONE_ALLOW_LEGACY_ORCHESTRATORS'] = 'true'
4+
5+
import requests
6+
7+
from fiftyone.core.utils import add_sys_path
8+
import fiftyone.operators as foo
9+
from fiftyone.operators import types
10+
11+
with add_sys_path(os.path.dirname(os.path.abspath(__file__))):
12+
from moondream import (
13+
run_moondream_model
14+
)
15+
16+
def _handle_calling(
17+
uri,
18+
sample_collection,
19+
model_name,
20+
bbox_field,
21+
output_field,
22+
confidence_threshold,
23+
delegate=False
24+
):
25+
ctx = dict(dataset=sample_collection)
26+
27+
params = dict(
28+
model_name=model_name,
29+
bbox_field=bbox_field,
30+
output_field=output_field,
31+
confidence_threshold=confidence_threshold,
32+
delegate=delegate
33+
)
34+
return foo.execute_operator(uri, ctx, params=params)
35+
36+
MOONDREAM_MODES = {
37+
"caption": "Caption images",
38+
"query": "Visual question answering",
39+
"detect": "Object detection",
40+
"point": "Apply point on object",
41+
}
42+
43+
44+
# Load Moondream2 revisions from HuggingFace
45+
MOONDREAM_VERSIONS_URL = "https://huggingface.co/vikhyatk/moondream2/raw/main/versions.txt"
46+
47+
def load_moondream_versions():
48+
try:
49+
response = requests.get(MOONDREAM_VERSIONS_URL)
50+
response.raise_for_status()
51+
# Split by newlines and remove any empty strings
52+
versions = [v.strip() for v in response.text.splitlines() if v.strip()]
53+
return versions
54+
except Exception as e:
55+
print(f"Failed to load Moondream versions: {e}")
56+
return ["2025-01-09"] # Return default version as fallback
57+
58+
# Load available versions
59+
MOONDREAM_REVISIONS = load_moondream_versions()
60+
61+
class MoondreamOperator(foo.Operator):
62+
@property
63+
def config(self):
64+
return foo.OperatorConfig(
65+
name="moondream",
66+
label="Run Moondream2",
67+
description="Run the Moondream model on your Dataset!",
68+
dynamic=True,
69+
icon="/assets/moon-phase-svgrepo-com.svg",
70+
)
71+
72+
def resolve_input(self, ctx):
73+
"""Implement this method to collect user inputs as parameters
74+
that are stored in `ctx.params`.
75+
76+
Returns:
77+
a `types.Property` defining the form's components
78+
"""
79+
inputs = types.Object()
80+
81+
mode_dropdown = types.Dropdown(label="What would you like to use Moondream2 for?")
82+
83+
for arch_key, arch_value in MOONDREAM_MODES.items():
84+
mode_dropdown.add_choice(arch_value, label=arch_key)
85+
86+
revision_dropdown = types.Dropdown(label="Which revision would you like to use?")
87+
88+
inputs.enum(
89+
"revision",
90+
values=revision_dropdown.values(),
91+
label="Revision",
92+
default="2025-01-09",
93+
description="Select from one of the available revisions. Note: The model weights will be downloaded from Hugging Face.",
94+
view=revision_dropdown,
95+
required=False
96+
)
97+
98+
inputs.enum(
99+
"operation",
100+
values=mode_dropdown.values(),
101+
label="Moondream2 Tasks",
102+
description="Select from one of the supported tasks.",
103+
view=mode_dropdown,
104+
required=True
105+
)
106+
107+
length_radio = types.RadioGroup()
108+
length_radio.add_choice("short", label="A short caption")
109+
length_radio.add_choice("normal", label="A more descriptive caption")
110+
111+
chosen_task = ctx.params.get("operation")
112+
113+
if chosen_task == "caption":
114+
inputs.enum(
115+
"length",
116+
label="Caption Length",
117+
description="Which caption type would you like?",
118+
required=True,
119+
view=length_radio
120+
)
121+
122+
if chosen_task == "query":
123+
inputs.str(
124+
"query_text",
125+
label="Query",
126+
description="What's your query?",
127+
required=True,
128+
)
129+
130+
if chosen_task == "detect":
131+
inputs.str(
132+
"object_type",
133+
label="Detect",
134+
description="What do you want to detect? Currently this model only supports passing one object.",
135+
required=True,
136+
)
137+
138+
if chosen_task == "point":
139+
inputs.str(
140+
"object_type",
141+
label="Point",
142+
description="What do you want to place a point on? Currently this model only supports passing one object",
143+
required=True,
144+
)
145+
146+
147+
inputs.str(
148+
"output_field",
149+
required=True,
150+
label="Output Field",
151+
description="Name of the field to store the results in."
152+
)
153+
154+
inputs.bool(
155+
"delegate",
156+
default=False,
157+
required=True,
158+
label="Delegate execution?",
159+
description=("If you choose to delegate this operation you must first have a delegated service running. "
160+
"You can launch a delegated service by running `fiftyone delegated launch` in your terminal"),
161+
view=types.CheckboxView(),
162+
)
163+
164+
inputs.view_target(ctx)
165+
166+
return types.Property(inputs)
167+
168+
def resolve_delegation(self, ctx):
169+
"""Implement this method if you want to programmatically *force*
170+
this operation to be delegated or executed immediately.
171+
172+
Returns:
173+
whether the operation should be delegated (True), run
174+
immediately (False), or None to defer to
175+
`resolve_execution_options()` to specify the available options
176+
"""
177+
return ctx.params.get("delegate", False)
178+
179+
def execute(self, ctx):
180+
"""Executes the actual operation based on the hydrated `ctx`.
181+
All operators must implement this method.
182+
183+
This method can optionally be implemented as `async`.
184+
185+
Returns:
186+
an optional dict of results values
187+
"""
188+
view = ctx.target_view()
189+
revision = ctx.params.get("revision")
190+
operation = ctx.params.get("operation")
191+
output_field = ctx.params.get("output_field")
192+
193+
# Create kwargs dictionary with additional parameters based on operation
194+
kwargs = {}
195+
if operation == "caption":
196+
kwargs["length"] = ctx.params.get("length")
197+
elif operation == "query":
198+
kwargs["query_text"] = ctx.params.get("query_text")
199+
elif operation in ["detect", "point"]:
200+
kwargs["object_type"] = ctx.params.get("object_type")
201+
202+
run_moondream_model(
203+
dataset=view,
204+
revision=revision,
205+
operation=operation,
206+
output_field=output_field,
207+
**kwargs
208+
)
209+
210+
ctx.ops.reload_dataset()
211+
212+
def __call__(
213+
self,
214+
sample_collection,
215+
revision,
216+
operation,
217+
output_field,
218+
delegate,
219+
**kwargs
220+
):
221+
return _handle_calling(
222+
self.uri,
223+
sample_collection,
224+
revision,
225+
operation,
226+
output_field,
227+
delegate,
228+
**kwargs
229+
)
230+
231+
def register(p):
232+
"""Always implement this method and register() each operator that your
233+
plugin defines.
234+
"""
235+
p.register(MoondreamOperator)

moondream.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,14 @@ class Moondream2(Model):
4444
4545
Args:
4646
operation (str): Type of operation to perform
47-
model_name (str): Name of the model to load from HuggingFace
4847
revision (str, optional): Model revision/tag to use
49-
device (str, optional): Device to run the model on ('cuda', 'mps', or 'cpu')
5048
**kwargs: Operation-specific parameters
5149
"""
5250

5351
def __init__(
5452
self,
5553
operation: str,
56-
model_name: str = "vikhyatk/moondream2",
5754
revision: Optional[str] = None,
58-
device: Optional[str] = None,
5955
**kwargs
6056
):
6157
self.operation = operation
@@ -73,11 +69,11 @@ def __init__(
7369
self.params = kwargs
7470

7571
# Set device
76-
self.device = device or get_device()
72+
self.device = get_device()
7773

7874
# Initialize model
7975
self.model = AutoModelForCausalLM.from_pretrained(
80-
model_name,
76+
"vikhyatk/moondream2",
8177
revision=revision,
8278
trust_remote_code=True,
8379
device_map={"": self.device},
@@ -233,9 +229,7 @@ def run_moondream_model(
233229
dataset: fo.Dataset,
234230
operation: str,
235231
output_field: str,
236-
model_name: str = "vikhyatk/moondream2",
237232
revision: Optional[str] = None,
238-
device: Optional[str] = None,
239233
**kwargs
240234
) -> None:
241235
"""Apply Moondream2 operations to a FiftyOne dataset.
@@ -244,16 +238,12 @@ def run_moondream_model(
244238
dataset: FiftyOne dataset to process
245239
operation: Type of operation to perform
246240
output_field: Field to store results in
247-
model_name: Name of the model to load from HuggingFace
248241
revision: Model revision/tag to use
249-
device: Device to run the model on
250242
**kwargs: Operation-specific parameters
251243
"""
252244
model = Moondream2(
253245
operation=operation,
254-
model_name=model_name,
255246
revision=revision,
256-
device=device,
257247
**kwargs
258248
)
259249
dataset.apply_model(model, label_field=output_field)

0 commit comments

Comments
 (0)