Skip to content

Commit 205d84a

Browse files
[VLM] Clean up models (#16873)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 5124f5b commit 205d84a

File tree

4 files changed

+2
-42
lines changed

4 files changed

+2
-42
lines changed

examples/offline_inference/mistral-small.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def run_simple_demo(args: argparse.Namespace):
6262
tokenizer_mode="mistral" if args.format == "mistral" else "auto",
6363
config_format="mistral" if args.format == "mistral" else "auto",
6464
load_format="mistral" if args.format == "mistral" else "auto",
65+
limit_mm_per_prompt={"image": 1},
6566
max_model_len=4096,
6667
max_num_seqs=2,
6768
tensor_parallel_size=2,

examples/offline_inference/vision_language.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
957957
"max_pixels": 1280 * 28 * 28,
958958
"fps": [1],
959959
},
960-
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
960+
limit_mm_per_prompt={"image": 1},
961961
)
962962

963963
if modality == "image":

vllm/model_executor/models/phi4mm.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -503,26 +503,6 @@ def get_feature_extractor(self) -> SequenceFeatureExtractor:
503503
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
504504
return {"audio": None, "image": None}
505505

506-
def get_mm_max_tokens_per_item(
507-
self,
508-
seq_len: int,
509-
mm_counts: Mapping[str, int],
510-
) -> Mapping[str, int]:
511-
return {
512-
"image": self.get_max_image_tokens(),
513-
"audio": self.get_max_audio_tokens(),
514-
}
515-
516-
def get_max_audio_tokens(self) -> int:
517-
sr = self.get_feature_extractor().sampling_rate
518-
num_frames = self.get_audio_num_frames(_AUDIO_MAX_SOUNDFILE_SIZE, sr)
519-
return self._compute_audio_embed_size(num_frames)
520-
521-
def get_max_image_tokens(self) -> int:
522-
target_width, target_height = self.get_image_size_with_most_features()
523-
return self.get_num_image_tokens(image_width=target_width,
524-
image_height=target_height)
525-
526506
def _find_target_aspect_ratio(
527507
self,
528508
orig_width: int,
@@ -764,9 +744,6 @@ def get_dummy_mm_data(
764744
num_audios = mm_counts.get("audio", 0)
765745
num_images = mm_counts.get("image", 0)
766746

767-
target_width, target_height = \
768-
self.info.get_image_size_with_most_features()
769-
770747
target_width, target_height = \
771748
self.info.get_image_size_with_most_features()
772749

vllm/model_executor/models/qwen2_5_omni_thinker.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -172,26 +172,9 @@ def get_feature_extractor(
172172
assert isinstance(feature_extractor, WhisperFeatureExtractor)
173173
return feature_extractor
174174

175-
def get_max_audio_tokens(self) -> int:
176-
hf_config = self.get_hf_config()
177-
max_source_position = hf_config.audio_config.max_source_positions
178-
output_lengths = (max_source_position - 2) // 2 + 1
179-
return output_lengths
180-
181175
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
182176
return {"audio": None, "image": None, "video": None}
183177

184-
def get_mm_max_tokens_per_item(
185-
self,
186-
seq_len: int,
187-
mm_counts: Mapping[str, int],
188-
) -> Mapping[str, int]:
189-
return {
190-
"audio": self.get_max_audio_tokens(),
191-
"image": self.get_max_image_tokens(),
192-
"video": self.get_max_video_tokens(seq_len, mm_counts),
193-
}
194-
195178

196179
class Qwen2_5OmniThinkerDummyInputsBuilder(
197180
BaseDummyInputsBuilder[Qwen2_5OmniThinkerProcessingInfo]):
@@ -210,7 +193,6 @@ def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
210193
return (audio_token * num_audios + image_token * num_images +
211194
video_token * num_videos)
212195

213-
# TODO: @abstractmethod after transition
214196
def get_dummy_mm_data(
215197
self,
216198
seq_len: int,

0 commit comments

Comments
 (0)