@@ -62,16 +62,16 @@ proc close(getter: Getter) =
62
62
avcodec_free_context(addr getter.decoderCtx)
63
63
getter.container.close()
64
64
65
- proc get(getter: Getter, start: int , endSample: int ): seq [seq [ int16 ] ] =
65
+ proc get(getter: Getter, start: int , endSample: int ): seq [int16 ] =
66
66
# start/end is in samples
67
67
let container = getter.container
68
68
let stream = getter.stream
69
69
let decoderCtx = getter.decoderCtx
70
70
71
71
let targetSamples = endSample - start
72
72
73
- # Initialize result with proper size (default zero-filled)
74
- result = @ [ newSeq[int16 ](targetSamples), newSeq[ int16 ](targetSamples)]
73
+ # Initialize result with proper size for interleaved stereo (default zero-filled)
74
+ result = newSeq[int16 ](targetSamples * 2 )
75
75
76
76
# Convert sample position to time and seek
77
77
let sampleRate = stream.codecpar.sample_rate
@@ -131,60 +131,50 @@ proc get(getter: Getter, start: int, endSample: int): seq[seq[int16]] =
131
131
let audioData = cast [ptr UncheckedArray[int16 ]](frame.data[0 ])
132
132
for i in 0 ..< samplesToProcess:
133
133
let frameIndex = samplesSkippedInFrame + i
134
- for ch in 0 ..< channels:
135
- result [ch][totalSamples + i] = audioData[frameIndex * channels + ch]
134
+ let resultIndex = (totalSamples + i) * 2 # Interleaved index
135
+ for ch in 0 ..< channels:
136
+ result [resultIndex + ch] = audioData[frameIndex * channels + ch]
136
137
137
138
elif frame.format == AV_SAMPLE_FMT_S16P.cint :
138
139
# Planar 16-bit
139
140
for i in 0 ..< samplesToProcess:
140
141
let frameIndex = samplesSkippedInFrame + i
141
- for ch in 0 ..< channels:
142
+ let resultIndex = (totalSamples + i) * 2 # Interleaved index
143
+ for ch in 0 ..< channels:
142
144
if frame.data[ch] != nil :
143
145
let channelData = cast [ptr UncheckedArray[int16 ]](frame.data[ch])
144
- result [ch][totalSamples + i ] = channelData[frameIndex]
146
+ result [resultIndex + ch ] = channelData[frameIndex]
145
147
146
148
elif frame.format == AV_SAMPLE_FMT_FLT.cint :
147
149
# Interleaved float
148
150
let audioData = cast [ptr UncheckedArray[cfloat ]](frame.data[0 ])
149
151
for i in 0 ..< samplesToProcess:
150
152
let frameIndex = samplesSkippedInFrame + i
151
- for ch in 0 ..< channels:
153
+ let resultIndex = (totalSamples + i) * 2 # Interleaved index
154
+ for ch in 0 ..< channels:
152
155
# Convert float to 16-bit int with proper clamping
153
156
let floatSample = audioData[frameIndex * channels + ch]
154
157
let clampedSample = max(- 1.0 , min(1.0 , floatSample))
155
- result [ch][totalSamples + i ] = int16 (clampedSample * 32767.0 )
158
+ result [resultIndex + ch ] = int16 (clampedSample * 32767.0 )
156
159
157
160
elif frame.format == AV_SAMPLE_FMT_FLTP.cint :
158
161
# Planar float
159
162
for i in 0 ..< samplesToProcess:
160
163
let frameIndex = samplesSkippedInFrame + i
161
- for ch in 0 ..< channels:
164
+ let resultIndex = (totalSamples + i) * 2 # Interleaved index
165
+ for ch in 0 ..< channels:
162
166
if frame.data[ch] != nil :
163
167
let channelData = cast [ptr UncheckedArray[cfloat ]](frame.data[ch])
164
168
# Convert float to 16-bit int with proper clamping
165
169
let floatSample = channelData[frameIndex]
166
170
let clampedSample = max(- 1.0 , min(1.0 , floatSample))
167
- result [ch][totalSamples + i ] = int16 (clampedSample * 32767.0 )
171
+ result [resultIndex + ch ] = int16 (clampedSample * 32767.0 )
168
172
else :
169
- # Unsupported format - samples already initialized to silence
170
- discard
173
+ error & " Unsupported audio format: { av_get_sample_fmt_name(frame.format)} "
171
174
172
175
totalSamples += samplesToProcess
173
176
samplesProcessed += samples
174
177
175
- # If we have mono input, duplicate to second channel
176
- if result .len >= 2 and result [0 ].len > 0 and result [1 ].len > 0 :
177
- # Check if second channel is all zeros (mono source)
178
- var isSecondChannelEmpty = true
179
- for i in 0 ..< min(1000 , result [1 ].len): # Check more samples for accuracy
180
- if result [1 ][i] != 0 :
181
- isSecondChannelEmpty = false
182
- break
183
-
184
- if isSecondChannelEmpty:
185
- # Copy first channel to second for stereo output using copyMem
186
- copyMem(addr result [1 ][0 ], addr result [0 ][0 ], result [0 ].len * sizeof(int16 ))
187
-
188
178
proc createAudioFilterGraph(clip: Clip, sr: int , layout: string ): (ptr AVFilterGraph,
189
179
ptr AVFilterContext, ptr AVFilterContext) =
190
180
var filterGraph: ptr AVFilterGraph = avfilter_graph_alloc()
@@ -251,17 +241,17 @@ proc createAudioFilterGraph(clip: Clip, sr: int, layout: string): (ptr AVFilterG
251
241
return (filterGraph, bufferSrc, bufferSink)
252
242
253
243
# Returns seq[int16] where channel data is interleaved: [L, R, L, R, L, R] etc.
254
- proc processAudioClip(clip: Clip, data: seq [seq [ int16 ] ], sourceSr: cint , targetSr: cint ): seq [int16 ] =
255
- if data.len == 0 or data[ 0 ].len == 0 :
244
+ proc processAudioClip(clip: Clip, data: seq [int16 ], sourceSr: cint , targetSr: cint ): seq [int16 ] =
245
+ if data.len == 0 :
256
246
return @ []
257
247
258
248
# First apply speed/volume processing at source sample rate (if needed)
259
249
var processedData = data
260
250
if clip.speed != 1.0 or clip.volume != 1.0 :
261
- let actualChannels = data.len
262
- let channels = if actualChannels == 1 : 1 else : 2
263
- let samples = data[ 0 ] .len
264
- let layout = ( if channels == 1 : " mono " else : " stereo" )
251
+ # Data is interleaved: [L, R, L, R, ...] so always stereo
252
+ let channels = 2
253
+ let samples = data.len div 2
254
+ let layout = " stereo"
265
255
let (filterGraph, bufferSrc, bufferSink) = createAudioFilterGraph(clip, sourceSr, layout)
266
256
defer :
267
257
if filterGraph != nil :
@@ -284,22 +274,13 @@ proc processAudioClip(clip: Clip, data: seq[seq[int16]], sourceSr: cint, targetS
284
274
if av_frame_get_buffer(inputFrame, 0 ) < 0 :
285
275
error " Could not allocate input audio frame buffer"
286
276
287
- # Copy input data to frame (planar format)
277
+ # Copy input data to frame (convert from interleaved to planar format)
288
278
for ch in 0 ..< channels:
289
279
let channelData = cast [ptr UncheckedArray[int16 ]](inputFrame.data[ch])
290
280
for i in 0 ..< samples:
291
- if ch == 0 :
292
- if i < data[0 ].len:
293
- channelData[i] = data[0 ][i]
294
- else :
295
- channelData[i] = 0
296
- elif ch == 1 :
297
- if actualChannels >= 2 and i < data[1 ].len:
298
- channelData[i] = data[1 ][i]
299
- elif i < data[0 ].len:
300
- channelData[i] = data[0 ][i]
301
- else :
302
- channelData[i] = 0
281
+ let srcIndex = i * 2 + ch # Interleaved index
282
+ if srcIndex < data.len:
283
+ channelData[i] = data[srcIndex]
303
284
else :
304
285
channelData[i] = 0
305
286
@@ -327,64 +308,61 @@ proc processAudioClip(clip: Clip, data: seq[seq[int16]], sourceSr: cint, targetS
327
308
328
309
outputFrames.add(outputFrame)
329
310
330
- # Convert output frames back to seq[seq[ int16] ]
311
+ # Convert output frames back to interleaved seq[int16]
331
312
if outputFrames.len == 0 :
332
- processedData = @ [newSeq[ int16 ]( 0 ), newSeq[ int16 ]( 0 ) ]
313
+ processedData = @ []
333
314
else :
334
315
var totalSamples = 0
335
316
for frame in outputFrames:
336
317
totalSamples += frame.nb_samples.int
337
318
338
- processedData = @ [ newSeq[int16 ](totalSamples), newSeq[ int16 ](totalSamples)]
319
+ processedData = newSeq[int16 ](totalSamples * 2 ) # Interleaved stereo
339
320
340
321
var sampleOffset = 0
341
322
for frame in outputFrames:
342
323
let frameSamples = frame.nb_samples.int
343
324
let frameChannels = min(frame.ch_layout.nb_channels.int , 2 )
344
325
345
326
if frame.format == AV_SAMPLE_FMT_S16P.cint :
346
- for ch in 0 ..< min(processedData.len, frameChannels):
347
- if frame.data[ch] != nil :
348
- let channelData = cast [ptr UncheckedArray[int16 ]](frame.data[ch])
349
- for i in 0 ..< frameSamples:
350
- if sampleOffset + i < processedData[ch].len:
351
- processedData[ch][sampleOffset + i] = channelData[i]
327
+ # Convert from planar to interleaved
328
+ for i in 0 ..< frameSamples:
329
+ let interleavedIndex = (sampleOffset + i) * 2
330
+ for ch in 0 ..< frameChannels:
331
+ if frame.data[ch] != nil and interleavedIndex + ch < processedData.len:
332
+ let channelData = cast [ptr UncheckedArray[int16 ]](frame.data[ch])
333
+ processedData[interleavedIndex + ch] = channelData[i]
352
334
elif frame.format == AV_SAMPLE_FMT_S16.cint :
335
+ # Already interleaved, just copy
353
336
let audioData = cast [ptr UncheckedArray[int16 ]](frame.data[0 ])
354
337
for i in 0 ..< frameSamples:
355
- for ch in 0 ..< min(processedData.len, frameChannels):
356
- if sampleOffset + i < processedData[ch].len:
357
- processedData[ch][sampleOffset + i] = audioData[i * frameChannels + ch]
338
+ let interleavedIndex = (sampleOffset + i) * 2
339
+ for ch in 0 ..< frameChannels:
340
+ if interleavedIndex + ch < processedData.len:
341
+ processedData[interleavedIndex + ch] = audioData[i * frameChannels + ch]
358
342
359
343
sampleOffset += frameSamples
360
344
361
- # Duplicate mono to stereo if needed
362
- if processedData.len >= 2 and processedData[ 0 ].len > 0 and processedData[ 1 ].len > 0 :
345
+ # Duplicate mono to stereo if needed (in interleaved format)
346
+ if processedData.len >= 2 :
363
347
var isSecondChannelEmpty = true
364
- for i in 0 ..< min(1000 , processedData[ 1 ] .len):
365
- if processedData[1 ][i ] != 0 :
348
+ for i in 0 ..< min(1000 , processedData.len div 2 ):
349
+ if processedData[i * 2 + 1 ] != 0 :
366
350
isSecondChannelEmpty = false
367
351
break
368
352
if isSecondChannelEmpty:
369
- copyMem(addr processedData[1 ][0 ], addr processedData[0 ][0 ], processedData[0 ].len * sizeof(int16 ))
353
+ for i in 0 ..< (processedData.len div 2 ):
354
+ processedData[i * 2 + 1 ] = processedData[i * 2 ]
370
355
371
356
# Now resample from source to target sample rate
372
357
if sourceSr == targetSr:
373
- # Convert from channel-separated to interleaved format
374
- let channels = min(processedData.len, 2 )
375
- let samples = if processedData.len > 0 : processedData[0 ].len else : 0
376
- result = newSeq[int16 ](samples * 2 ) # Always use stereo output
377
- for i in 0 ..< samples:
378
- result [i * 2 ] = if channels > 0 and i < processedData[0 ].len: processedData[0 ][i] else : 0
379
- result [i * 2 + 1 ] = if channels > 1 and i < processedData[1 ].len: processedData[1 ][i] else :
380
- (if channels > 0 and i < processedData[0 ].len: processedData[0 ][i] else : 0 )
381
- return result
358
+ # Data is already in interleaved format
359
+ return processedData
382
360
383
- if processedData.len == 0 or processedData[ 0 ].len == 0 :
361
+ if processedData.len == 0 :
384
362
return @ []
385
363
386
- let channels = processedData.len
387
- let samples = processedData[ 0 ] .len
364
+ let channels = 2 # Always stereo interleaved
365
+ let samples = processedData.len div 2
388
366
389
367
# Create resampler for this conversion
390
368
var resampler = newAudioResampler(AV_SAMPLE_FMT_S16P, if channels == 1 : " mono" else : " stereo" , targetSr.int )
@@ -406,12 +384,13 @@ proc processAudioClip(clip: Clip, data: seq[seq[int16]], sourceSr: cint, targetS
406
384
if av_frame_get_buffer(inputFrame, 0 ) < 0 :
407
385
error " Could not allocate input frame buffer for resampling"
408
386
409
- # Copy data to input frame
387
+ # Copy data to input frame (convert from interleaved to planar)
410
388
for ch in 0 ..< channels:
411
389
let channelData = cast [ptr UncheckedArray[int16 ]](inputFrame.data[ch])
412
390
for i in 0 ..< samples:
413
- if i < processedData[ch].len:
414
- channelData[i] = processedData[ch][i]
391
+ let srcIndex = i * 2 + ch # Interleaved index
392
+ if srcIndex < processedData.len:
393
+ channelData[i] = processedData[srcIndex]
415
394
else :
416
395
channelData[i] = 0
417
396
0 commit comments