Skip to content

Commit bcd4cf9

Browse files
committed
Move iter_audio to analyze.py
1 parent bfd5173 commit bcd4cf9

File tree

7 files changed

+109
-167
lines changed

7 files changed

+109
-167
lines changed

auto_editor/analyze.py

Lines changed: 93 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
import re
55
from dataclasses import dataclass
66
from fractions import Fraction
7+
from math import ceil
78
from typing import TYPE_CHECKING
89

910
import av
1011
import numpy as np
12+
from av.audio.fifo import AudioFifo
1113
from av.subtitles.subtitle import AssSubtitle
1214

1315
from auto_editor import version
1416
from auto_editor.utils.subtitle_tools import convert_ass_to_text
15-
from auto_editor.wavfile import read
1617

1718
if TYPE_CHECKING:
1819
from collections.abc import Iterator
@@ -22,15 +23,13 @@
2223
from numpy.typing import NDArray
2324

2425
from auto_editor.ffwrapper import FileInfo
25-
from auto_editor.output import Ensure
2626
from auto_editor.utils.bar import Bar
2727
from auto_editor.utils.log import Log
2828

2929

3030
@dataclass(slots=True)
3131
class FileSetup:
3232
src: FileInfo
33-
ensure: Ensure
3433
strict: bool
3534
tb: Fraction
3635
bar: Bar
@@ -89,6 +88,41 @@ def obj_tag(tag: str, tb: Fraction, obj: dict[str, Any]) -> str:
8988
return key
9089

9190

91+
def iter_audio(src, tb: Fraction, stream: int = 0) -> Iterator[float]:
92+
fifo = AudioFifo()
93+
try:
94+
container = av.open(src.path, "r")
95+
audio_stream = container.streams.audio[stream]
96+
sample_rate = audio_stream.rate
97+
98+
exact_size = (1 / tb) * sample_rate
99+
accumulated_error = 0
100+
101+
# Resample so that audio data is between [-1, 1]
102+
resampler = av.AudioResampler(
103+
av.AudioFormat("flt"), audio_stream.layout, sample_rate
104+
)
105+
106+
for frame in container.decode(audio=stream):
107+
frame.pts = None # Skip time checks
108+
109+
for reframe in resampler.resample(frame):
110+
fifo.write(reframe)
111+
112+
while fifo.samples >= ceil(exact_size):
113+
size_with_error = exact_size + accumulated_error
114+
current_size = round(size_with_error)
115+
accumulated_error = size_with_error - current_size
116+
117+
audio_chunk = fifo.read(current_size)
118+
assert audio_chunk is not None
119+
arr = audio_chunk.to_ndarray().flatten()
120+
yield float(np.max(np.abs(arr)))
121+
122+
finally:
123+
container.close()
124+
125+
92126
def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[float]:
93127
container = av.open(src.path, "r")
94128

@@ -138,7 +172,6 @@ def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[float]:
138172

139173
@dataclass(slots=True)
140174
class Levels:
141-
ensure: Ensure
142175
src: FileInfo
143176
tb: Fraction
144177
bar: Bar
@@ -151,24 +184,16 @@ def media_length(self) -> int:
151184
if (arr := self.read_cache("audio", {"stream": 0})) is not None:
152185
return len(arr)
153186

154-
sr, samples = read(self.ensure.audio(self.src, 0))
155-
samp_count = len(samples)
156-
del samples
157-
158-
samp_per_ticks = sr / self.tb
159-
ticks = int(samp_count / samp_per_ticks)
160-
self.log.debug(f"Audio Length: {ticks}")
161-
self.log.debug(
162-
f"... without rounding: {float(samp_count / samp_per_ticks)}"
163-
)
164-
return ticks
187+
result = sum(1 for _ in iter_audio(self.src, self.tb, 0))
188+
self.log.debug(f"Audio Length: {result}")
189+
return result
165190

166191
# If there's no audio, get length in video metadata.
167-
with av.open(f"{self.src.path}") as cn:
168-
if len(cn.streams.video) < 1:
192+
with av.open(self.src.path) as container:
193+
if len(container.streams.video) == 0:
169194
self.log.error("Could not get media duration")
170195

171-
video = cn.streams.video[0]
196+
video = container.streams.video[0]
172197

173198
if video.duration is None or video.time_base is None:
174199
dur = 0
@@ -213,56 +238,70 @@ def cache(self, tag: str, obj: dict[str, Any], arr: np.ndarray) -> np.ndarray:
213238
return arr
214239

215240
def audio(self, stream: int) -> NDArray[np.float64]:
216-
if stream > len(self.src.audios) - 1:
241+
if stream >= len(self.src.audios):
217242
raise LevelError(f"audio: audio stream '{stream}' does not exist.")
218243

219244
if (arr := self.read_cache("audio", {"stream": stream})) is not None:
220245
return arr
221246

222-
sr, samples = read(self.ensure.audio(self.src, stream))
223-
224-
if len(samples) == 0:
225-
raise LevelError(f"audio: stream '{stream}' has no samples.")
226-
227-
def get_max_volume(s: np.ndarray) -> float:
228-
return max(float(np.max(s)), -float(np.min(s)))
229-
230-
max_volume = get_max_volume(samples)
231-
self.log.debug(f"Max volume: {max_volume}")
247+
with av.open(self.src.path, "r") as container:
248+
audio = container.streams.audio[stream]
249+
if audio.duration is not None and audio.time_base is not None:
250+
inaccurate_dur = int(audio.duration * audio.time_base * self.tb)
251+
elif container.duration is not None:
252+
inaccurate_dur = int(container.duration / av.time_base * self.tb)
253+
else:
254+
inaccurate_dur = 1024
232255

233-
samp_count = samples.shape[0]
234-
samp_per_ticks = sr / self.tb
256+
bar = self.bar
257+
bar.start(inaccurate_dur, "Analyzing audio volume")
235258

236-
if samp_per_ticks < 1:
237-
self.log.error(
238-
f"audio: stream '{stream}'\n Samplerate ({sr}) must be greater than "
239-
f"or equal to timebase ({self.tb})\n"
240-
" Try `-fps 30` and/or `--sample-rate 48000`"
241-
)
259+
result = np.zeros((inaccurate_dur), dtype=np.float64)
260+
index = 0
261+
for value in iter_audio(self.src, self.tb, stream):
262+
if index > len(result) - 1:
263+
result = np.concatenate(
264+
(result, np.zeros((len(result)), dtype=np.float64))
265+
)
266+
result[index] = value
267+
bar.tick(index)
268+
index += 1
242269

243-
audio_ticks = int(samp_count / samp_per_ticks)
244-
self.log.debug(
245-
f"analyze: audio length: {audio_ticks} ({float(samp_count / samp_per_ticks)})"
246-
)
247-
self.bar.start(audio_ticks, "Analyzing audio volume")
270+
bar.end()
271+
return self.cache("audio", {"stream": stream}, result[:index])
248272

249-
threshold_list = np.zeros((audio_ticks), dtype=np.float64)
273+
def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float64]:
274+
if stream >= len(self.src.videos):
275+
raise LevelError(f"motion: video stream '{stream}' does not exist.")
250276

251-
if max_volume == 0: # Prevent dividing by zero
252-
return threshold_list
277+
mobj = {"stream": stream, "width": width, "blur": blur}
278+
if (arr := self.read_cache("motion", mobj)) is not None:
279+
return arr
253280

254-
# Determine when audio is silent or loud.
255-
for i in range(audio_ticks):
256-
if i % 500 == 0:
257-
self.bar.tick(i)
281+
with av.open(self.src.path, "r") as container:
282+
video = container.streams.video[stream]
283+
inaccurate_dur = (
284+
1024
285+
if video.duration is None or video.time_base is None
286+
else int(video.duration * video.time_base * self.tb)
287+
)
258288

259-
start = int(i * samp_per_ticks)
260-
end = min(int((i + 1) * samp_per_ticks), samp_count)
289+
bar = self.bar
290+
bar.start(inaccurate_dur, "Analyzing motion")
261291

262-
threshold_list[i] = get_max_volume(samples[start:end]) / max_volume
292+
result = np.zeros((inaccurate_dur), dtype=np.float64)
293+
index = 0
294+
for value in iter_motion(self.src, self.tb, stream, blur, width):
295+
if index > len(result) - 1:
296+
result = np.concatenate(
297+
(result, np.zeros((len(result)), dtype=np.float64))
298+
)
299+
result[index] = value
300+
bar.tick(index)
301+
index += 1
263302

264-
self.bar.end()
265-
return self.cache("audio", {"stream": stream}, threshold_list)
303+
bar.end()
304+
return self.cache("motion", mobj, result[:index])
266305

267306
def subtitle(
268307
self,
@@ -336,37 +375,3 @@ def subtitle(
336375
container.close()
337376

338377
return result
339-
340-
def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float64]:
341-
if stream >= len(self.src.videos):
342-
raise LevelError(f"motion: video stream '{stream}' does not exist.")
343-
344-
mobj = {"stream": stream, "width": width, "blur": blur}
345-
if (arr := self.read_cache("motion", mobj)) is not None:
346-
return arr
347-
348-
with av.open(self.src.path, "r") as container:
349-
video = container.streams.video[stream]
350-
inaccurate_dur = (
351-
1024
352-
if video.duration is None or video.time_base is None
353-
else int(video.duration * video.time_base * self.tb)
354-
)
355-
356-
bar = self.bar
357-
bar.start(inaccurate_dur, "Analyzing motion")
358-
359-
threshold_list = np.zeros((inaccurate_dur), dtype=np.float64)
360-
index = 0
361-
362-
for value in iter_motion(self.src, self.tb, stream, blur, width):
363-
if index > len(threshold_list) - 1:
364-
threshold_list = np.concatenate(
365-
(threshold_list, np.zeros((len(threshold_list)), dtype=np.float64))
366-
)
367-
threshold_list[index] = value
368-
bar.tick(index)
369-
index += 1
370-
371-
bar.end()
372-
return self.cache("motion", mobj, threshold_list[:index])

auto_editor/edit.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,8 @@ def edit_media(
202202
else:
203203
samplerate = args.sample_rate
204204

205-
ensure = Ensure(ffmpeg, bar, samplerate, temp, log)
206-
207205
if tl is None:
208-
tl = make_timeline(sources, ensure, args, samplerate, bar, temp, log)
206+
tl = make_timeline(sources, args, samplerate, bar, temp, log)
209207

210208
if export["export"] == "timeline":
211209
from auto_editor.formats.json import make_json_timeline
@@ -216,7 +214,7 @@ def edit_media(
216214
if args.preview:
217215
from auto_editor.preview import preview
218216

219-
preview(ensure, tl, temp, log)
217+
preview(tl, temp, log)
220218
return
221219

222220
if export["export"] == "json":
@@ -265,6 +263,8 @@ def make_media(tl: v3, output: str) -> None:
265263
sub_output = []
266264
apply_later = False
267265

266+
ensure = Ensure(ffmpeg, bar, samplerate, temp, log)
267+
268268
if ctr.default_sub != "none" and not args.sn:
269269
sub_output = make_new_subtitles(tl, ensure, temp)
270270

auto_editor/make_layers.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
if TYPE_CHECKING:
1919
from numpy.typing import NDArray
2020

21-
from auto_editor.output import Ensure
2221
from auto_editor.utils.bar import Bar
2322
from auto_editor.utils.chunks import Chunks
2423
from auto_editor.utils.log import Log
@@ -75,7 +74,6 @@ def make_av(src: FileInfo, all_clips: list[list[Clip]]) -> tuple[VSpace, ASpace]
7574
def run_interpreter_for_edit_option(
7675
text: str, filesetup: FileSetup
7776
) -> NDArray[np.bool_]:
78-
ensure = filesetup.ensure
7977
src = filesetup.src
8078
tb = filesetup.tb
8179
bar = filesetup.bar
@@ -87,8 +85,8 @@ def run_interpreter_for_edit_option(
8785
if log.is_debug:
8886
log.debug(f"edit: {parser}")
8987

90-
env["timebase"] = filesetup.tb
91-
env["@levels"] = Levels(ensure, src, tb, bar, temp, log)
88+
env["timebase"] = tb
89+
env["@levels"] = Levels(src, tb, bar, temp, log)
9290
env["@filesetup"] = filesetup
9391

9492
results = interpret(env, parser)
@@ -139,7 +137,6 @@ def parse_time(val: str, arr: NDArray, tb: Fraction) -> int: # raises: `CoerceE
139137

140138
def make_timeline(
141139
sources: list[FileInfo],
142-
ensure: Ensure,
143140
args: Args,
144141
sr: int,
145142
bar: Bar,
@@ -169,7 +166,7 @@ def make_timeline(
169166
concat = np.concatenate
170167

171168
for i, src in enumerate(sources):
172-
filesetup = FileSetup(src, ensure, len(sources) < 2, tb, bar, temp, log)
169+
filesetup = FileSetup(src, len(sources) < 2, tb, bar, temp, log)
173170

174171
edit_result = run_interpreter_for_edit_option(method, filesetup)
175172
mut_margin(edit_result, start_margin, end_margin)

auto_editor/preview.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from typing import TextIO
77

88
from auto_editor.analyze import Levels
9-
from auto_editor.output import Ensure
109
from auto_editor.timeline import v3
1110
from auto_editor.utils.bar import Bar
1211
from auto_editor.utils.func import to_timecode
@@ -49,7 +48,7 @@ def all_cuts(tl: v3, in_len: int) -> list[int]:
4948
return cut_lens
5049

5150

52-
def preview(ensure: Ensure, tl: v3, temp: str, log: Log) -> None:
51+
def preview(tl: v3, temp: str, log: Log) -> None:
5352
log.conwrite("")
5453
tb = tl.tb
5554

@@ -66,7 +65,7 @@ def preview(ensure: Ensure, tl: v3, temp: str, log: Log) -> None:
6665

6766
in_len = 0
6867
for src in all_sources:
69-
in_len += Levels(ensure, src, tb, Bar("none"), temp, log).media_length
68+
in_len += Levels(src, tb, Bar("none"), temp, log).media_length
7069

7170
out_len = tl.out_len()
7271

auto_editor/render/subtitle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def parse(self, text: str, codec: str) -> None:
4949
self.codec = codec
5050
self.contents = []
5151

52-
if codec == "ass":
52+
if codec == "ass" or codec == "ssa":
5353
time_code = re.compile(r"(.*)(\d+:\d+:[\d.]+)(.*)(\d+:\d+:[\d.]+)(.*)")
5454
elif codec == "webvtt":
5555
time_code = re.compile(r"()(\d+:[\d.]+)( --> )(\d+:[\d.]+)(\n.*)")

0 commit comments

Comments
 (0)