Skip to content

Commit 5007256

Browse files
BraveYimeoer
authored andcommitted
feat(nydusify): support convert raw format of modctl artifact
Signed-off-by: Yang Kaiyong <[email protected]>
1 parent f5b2fe5 commit 5007256

File tree

5 files changed

+299
-36
lines changed

5 files changed

+299
-36
lines changed

contrib/nydusify/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ require (
4040
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
4141
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20231105174938-2b5cbb29f3e2 // indirect
4242
github.com/BraveY/snapshotter-converter v0.0.5 // indirect
43-
github.com/CloudNativeAI/model-spec v0.0.2 // indirect
43+
github.com/CloudNativeAI/model-spec v0.0.5 // indirect
4444
github.com/Microsoft/go-winio v0.6.2 // indirect
4545
github.com/Microsoft/hcsshim v0.11.5 // indirect
4646
github.com/agiledragon/gomonkey/v2 v2.13.0 // indirect

contrib/nydusify/pkg/external/modctl/modctl.go

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,26 @@ import (
1818

1919
"github.com/pkg/errors"
2020

21+
modelspec "github.com/CloudNativeAI/model-spec/specs-go/v1"
2122
"github.com/dragonflyoss/nydus/contrib/nydusify/pkg/snapshotter/external/backend"
2223
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
2324
)
2425

2526
const (
26-
BlobPath = "/content.v1/docker/registry/v2/blobs/%s/%s/%s/data"
27-
ReposPath = "/content.v1/docker/registry/v2/repositories"
28-
ManifestPath = "/_manifests/tags/%s/current/link"
29-
ModelWeightMediaType = "application/vnd.cnai.model.weight.v1.tar"
30-
ModelDatasetMediaType = "application/vnd.cnai.model.dataset.v1.tar"
27+
BlobPath = "/content.v1/docker/registry/v2/blobs/%s/%s/%s/data"
28+
ReposPath = "/content.v1/docker/registry/v2/repositories"
29+
ManifestPath = "/_manifests/tags/%s/current/link"
3130
)
3231

3332
const (
3433
DefaultFileChunkSize = "4MiB"
3534
)
3635

3736
var mediaTypeChunkSizeMap = map[string]string{
38-
ModelWeightMediaType: "64MiB",
39-
ModelDatasetMediaType: "64MiB",
37+
modelspec.MediaTypeModelWeight: "64MiB",
38+
modelspec.MediaTypeModelWeightRaw: "64MiB",
39+
modelspec.MediaTypeModelDataset: "64MiB",
40+
modelspec.MediaTypeModelDatasetRaw: "64MiB",
4041
}
4142

4243
var _ backend.Handler = &Handler{}
@@ -130,8 +131,10 @@ func setWeightChunkSize(chunkSize uint64) {
130131
chunkSizeStr := humanize.IBytes(chunkSize)
131132
// remove space in chunkSizeStr `16 Mib` -> `16Mib`
132133
chunkSizeStr = strings.ReplaceAll(chunkSizeStr, " ", "")
133-
mediaTypeChunkSizeMap[ModelWeightMediaType] = chunkSizeStr
134-
mediaTypeChunkSizeMap[ModelDatasetMediaType] = chunkSizeStr
134+
mediaTypeChunkSizeMap[modelspec.MediaTypeModelWeight] = chunkSizeStr
135+
mediaTypeChunkSizeMap[modelspec.MediaTypeModelWeightRaw] = chunkSizeStr
136+
mediaTypeChunkSizeMap[modelspec.MediaTypeModelDataset] = chunkSizeStr
137+
mediaTypeChunkSizeMap[modelspec.MediaTypeModelDatasetRaw] = chunkSizeStr
135138
}
136139

137140
func getChunkSizeByMediaType(mediaType string) string {
@@ -206,9 +209,29 @@ func (handler *Handler) Handle(_ context.Context, file backend.File) ([]backend.
206209
}
207210
defer f.Close()
208211

209-
files, err := readTarBlob(f)
212+
isTar, err := validateTarFile(f)
210213
if err != nil {
211-
return nil, errors.Wrap(err, "read blob failed")
214+
return nil, errors.Wrap(err, "validate tar file failed")
215+
}
216+
217+
var files []fileInfo
218+
if isTar {
219+
fs, err := readTarBlob(f)
220+
if err != nil {
221+
return nil, errors.Wrap(err, "read blob failed")
222+
}
223+
files = fs
224+
} else {
225+
fm, err := f.Stat()
226+
if err != nil {
227+
return nil, errors.Wrap(err, "stat file failed")
228+
}
229+
files = append(files, fileInfo{
230+
fm.Name(),
231+
uint32(fm.Mode()),
232+
uint64(fm.Size()),
233+
0,
234+
})
212235
}
213236

214237
chunkSizeInInt, err := humanize.ParseBytes(chunkSize)
@@ -381,3 +404,44 @@ func readTarBlob(r io.ReadSeeker) ([]fileInfo, error) {
381404
}
382405
return files, nil
383406
}
407+
408+
func readRawBlob(layer ocispec.Descriptor) ([]fileInfo, error) {
409+
if !strings.HasSuffix(layer.MediaType, "raw") {
410+
return nil, fmt.Errorf("invalid media type: %s", layer.MediaType)
411+
}
412+
413+
path, ok := layer.Annotations[filePathKey]
414+
if !ok || len(path) == 0 {
415+
return nil, fmt.Errorf("invalid file path")
416+
}
417+
418+
b, ok := layer.Annotations[modelspec.AnnotationFileMetadata]
419+
if !ok || len(b) == 0 {
420+
return nil, errors.Errorf("missing file metadata annotation")
421+
}
422+
423+
var fm modelspec.FileMetadata
424+
if err := json.Unmarshal([]byte(b), &fm); err != nil {
425+
return nil, errors.Wrap(err, "unmarshal file metadata failed")
426+
}
427+
file := fileInfo{
428+
name: path,
429+
mode: fm.Mode,
430+
size: uint64(fm.Size),
431+
offset: 0,
432+
}
433+
return []fileInfo{file}, nil
434+
}
435+
436+
func validateTarFile(f *os.File) (bool, error) {
437+
tr := tar.NewReader(f)
438+
_, err := tr.Next()
439+
if err != nil && err != io.EOF {
440+
return false, nil
441+
}
442+
443+
if _, err := f.Seek(0, io.SeekStart); err != nil {
444+
return false, errors.Wrap(err, "reset file pointer failed")
445+
}
446+
return true, nil
447+
}

contrib/nydusify/pkg/external/modctl/modctl_test.go

Lines changed: 112 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/stretchr/testify/mock"
2222
"github.com/stretchr/testify/require"
2323

24+
modelspec "github.com/CloudNativeAI/model-spec/specs-go/v1"
2425
pkgPvd "github.com/dragonflyoss/nydus/contrib/nydusify/pkg/provider"
2526
"github.com/dragonflyoss/nydus/contrib/nydusify/pkg/snapshotter/external/backend"
2627
"github.com/opencontainers/go-digest"
@@ -151,6 +152,92 @@ func TestReadTarBlob(t *testing.T) {
151152
})
152153
}
153154

155+
func TestReadRawBlob(t *testing.T) {
156+
layer := ocispec.Descriptor{
157+
MediaType: modelspec.MediaTypeModelDatasetRaw,
158+
Size: 100,
159+
}
160+
fm := modelspec.FileMetadata{
161+
Name: "test.raw",
162+
Mode: 0644,
163+
Size: 100,
164+
}
165+
b, err := json.Marshal(fm)
166+
require.NoError(t, err)
167+
layer.Annotations = map[string]string{
168+
filePathKey: "test.raw",
169+
modelspec.AnnotationFileMetadata: string(b),
170+
}
171+
files, err := readRawBlob(layer)
172+
assert.NoError(t, err)
173+
assert.Equal(t, []fileInfo{{
174+
name: "test.raw",
175+
mode: 0644,
176+
size: 100,
177+
offset: 0,
178+
}}, files)
179+
}
180+
181+
func TestValidateTar(t *testing.T) {
182+
t.Run("Normal case: valid tar file", func(t *testing.T) {
183+
tmpfile, err := os.CreateTemp("", "testvalid.tar")
184+
require.NoError(t, err)
185+
defer os.Remove(tmpfile.Name())
186+
tw := tar.NewWriter(tmpfile)
187+
err = tw.WriteHeader(&tar.Header{
188+
Name: "testfile.txt",
189+
Mode: 0600,
190+
Size: 13,
191+
})
192+
require.NoError(t, err)
193+
_, err = tw.Write([]byte("hello, world\n"))
194+
require.NoError(t, err)
195+
err = tw.Close()
196+
require.NoError(t, err)
197+
tmpfile.Close()
198+
199+
f, err := os.Open(tmpfile.Name())
200+
require.NoError(t, err)
201+
defer f.Close()
202+
203+
valid, err := validateTarFile(f)
204+
assert.NoError(t, err)
205+
assert.True(t, valid)
206+
})
207+
208+
t.Run("Normal case: invalid tar file", func(t *testing.T) {
209+
tmpfile, err := os.CreateTemp("", "testinvalid.tar")
210+
require.NoError(t, err)
211+
defer os.Remove(tmpfile.Name())
212+
_, err = tmpfile.Write([]byte("invalid tar content"))
213+
require.NoError(t, err)
214+
tmpfile.Close()
215+
216+
f, err := os.Open(tmpfile.Name())
217+
require.NoError(t, err)
218+
defer f.Close()
219+
220+
valid, err := validateTarFile(f)
221+
assert.NoError(t, err)
222+
assert.False(t, valid)
223+
})
224+
225+
t.Run("Empty tar file", func(t *testing.T) {
226+
tmpfile, err := os.CreateTemp("", "testempty.tar")
227+
require.NoError(t, err)
228+
os.Truncate(tmpfile.Name(), 0)
229+
tmpfile.Close()
230+
231+
f, err := os.Open(tmpfile.Name())
232+
require.NoError(t, err)
233+
defer f.Close()
234+
235+
valid, err := validateTarFile(f)
236+
assert.NoError(t, err)
237+
assert.True(t, valid)
238+
})
239+
}
240+
154241
func TestGetOption(t *testing.T) {
155242
t.Run("Valid srcRef", func(t *testing.T) {
156243
srcRef := "host/namespace/image:tag"
@@ -190,7 +277,7 @@ func TestHandle(t *testing.T) {
190277

191278
handler.blobsMap = make(map[string]blobInfo)
192279
handler.blobsMap["test_digest"] = blobInfo{
193-
mediaType: ModelWeightMediaType,
280+
mediaType: modelspec.MediaTypeModelWeight,
194281
}
195282
t.Run("Open file failure", func(t *testing.T) {
196283
file := backend.File{RelativePath: "test/test_digest/nonexistent-file"}
@@ -199,7 +286,7 @@ func TestHandle(t *testing.T) {
199286
assert.Contains(t, err.Error(), "open tar file failed")
200287
})
201288

202-
t.Run("Normal", func(t *testing.T) {
289+
t.Run("Normal tar file", func(t *testing.T) {
203290
os.MkdirAll("/tmp/test/test_digest/", 0755)
204291
testFile, err := os.CreateTemp("/tmp/test/test_digest/", "test_tar")
205292
assert.NoError(t, err)
@@ -222,6 +309,20 @@ func TestHandle(t *testing.T) {
222309
assert.Equal(t, 1, len(chunks))
223310
})
224311

312+
t.Run("Normal raw file", func(t *testing.T) {
313+
os.MkdirAll("/tmp/test/test_digest/", 0755)
314+
testFile, err := os.CreateTemp("/tmp/test/test_digest/", "test_raw")
315+
assert.NoError(t, err)
316+
defer testFile.Close()
317+
defer os.RemoveAll(testFile.Name())
318+
testFile.Write([]byte("test"))
319+
testFilePath := strings.TrimPrefix(testFile.Name(), "/tmp/")
320+
file := backend.File{RelativePath: testFilePath}
321+
chunks, err := handler.Handle(context.Background(), file)
322+
assert.NoError(t, err)
323+
assert.Equal(t, 1, len(chunks))
324+
})
325+
225326
}
226327

227328
func TestModctlBackend(t *testing.T) {
@@ -250,28 +351,28 @@ func TestConvertToBlobs(t *testing.T) {
250351
Layers: []ocispec.Descriptor{
251352
{
252353
Digest: digest.Digest("sha256:abc123"),
253-
MediaType: ModelWeightMediaType,
354+
MediaType: modelspec.MediaTypeModelWeight,
254355
Size: 100,
255356
},
256357
},
257358
}
258359
actualBlobs1 := convertToBlobs(manifestWithColon)
259360
assert.Equal(t, 1, len(actualBlobs1))
260-
assert.Equal(t, ModelWeightMediaType, actualBlobs1[0].Config.MediaType)
361+
assert.Equal(t, modelspec.MediaTypeModelWeight, actualBlobs1[0].Config.MediaType)
261362
assert.Equal(t, "abc123", actualBlobs1[0].Config.Digest)
262363

263364
manifestWithoutColon := &ocispec.Manifest{
264365
Layers: []ocispec.Descriptor{
265366
{
266367
Digest: digest.Digest("abc123"),
267-
MediaType: ModelDatasetMediaType,
368+
MediaType: modelspec.MediaTypeModelDataset,
268369
Size: 100,
269370
},
270371
},
271372
}
272373
actualBlobs2 := convertToBlobs(manifestWithoutColon)
273374
assert.Equal(t, 1, len(actualBlobs2))
274-
assert.Equal(t, ModelDatasetMediaType, actualBlobs2[0].Config.MediaType)
375+
assert.Equal(t, modelspec.MediaTypeModelDataset, actualBlobs2[0].Config.MediaType)
275376
assert.Equal(t, "abc123", actualBlobs2[0].Config.Digest)
276377
}
277378

@@ -297,7 +398,7 @@ func TestExtractManifest(t *testing.T) {
297398

298399
var m = ocispec.Manifest{
299400
Config: ocispec.Descriptor{
300-
MediaType: ModelWeightMediaType,
401+
MediaType: modelspec.MediaTypeModelWeight,
301402
Digest: "sha256:abc1234",
302403
Size: 10,
303404
},
@@ -336,16 +437,16 @@ func TestSetBlobsMap(t *testing.T) {
336437
func TestSetWeightChunkSize(t *testing.T) {
337438
setWeightChunkSize(0)
338439
expectedDefault := "64MiB"
339-
assert.Equal(t, expectedDefault, mediaTypeChunkSizeMap[ModelWeightMediaType], "Weight media type should be set to default value")
340-
assert.Equal(t, expectedDefault, mediaTypeChunkSizeMap[ModelDatasetMediaType], "Dataset media type should be set to default value")
440+
assert.Equal(t, expectedDefault, mediaTypeChunkSizeMap[modelspec.MediaTypeModelWeight], "Weight media type should be set to default value")
441+
assert.Equal(t, expectedDefault, mediaTypeChunkSizeMap[modelspec.MediaTypeModelDataset], "Dataset media type should be set to default value")
341442

342443
chunkSize := uint64(16 * 1024 * 1024)
343444
setWeightChunkSize(chunkSize)
344445
expectedNonDefault := humanize.IBytes(chunkSize)
345446
expectedNonDefault = strings.ReplaceAll(expectedNonDefault, " ", "")
346447

347-
assert.Equal(t, expectedNonDefault, mediaTypeChunkSizeMap[ModelWeightMediaType], "Weight media type should match the specified chunk size")
348-
assert.Equal(t, expectedNonDefault, mediaTypeChunkSizeMap[ModelDatasetMediaType], "Dataset media type should match the specified chunk size")
448+
assert.Equal(t, expectedNonDefault, mediaTypeChunkSizeMap[modelspec.MediaTypeModelWeight], "Weight media type should match the specified chunk size")
449+
assert.Equal(t, expectedNonDefault, mediaTypeChunkSizeMap[modelspec.MediaTypeModelDataset], "Dataset media type should match the specified chunk size")
349450
}
350451

351452
func TestNewHandler(t *testing.T) {

0 commit comments

Comments
 (0)