Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion go/cmd/exporter/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package main

import (
"context"
"errors"
"hash/crc32"
"log/slog"
"os"
"path/filepath"
Expand All @@ -11,6 +13,9 @@ import (
"github.com/google/osv.dev/go/osv/clients"
)

// crc32cTable uses the Castagnoli polynomial, matching GCS's own checksum algorithm.
var crc32cTable = crc32.MakeTable(crc32.Castagnoli)

// writeMsg holds the data for a file to be written.
type writeMsg struct {
path string
Expand All @@ -31,7 +36,10 @@ func writer(ctx context.Context, cancel context.CancelFunc, inCh <-chan writeMsg
}
path := filepath.Join(pathPrefix, msg.path)
if client != nil {
// Write to the bucket.
// Skip the upload if the object already has the same content.
if gcsContentUnchanged(ctx, client, path, msg.data) {
break
}
err := client.WriteObject(ctx, path, msg.data, &clients.WriteOptions{
ContentType: msg.mimeType,
})
Expand Down Expand Up @@ -62,3 +70,22 @@ func writer(ctx context.Context, cancel context.CancelFunc, inCh <-chan writeMsg
}
}
}

// gcsContentUnchanged returns true if the object at path already has the same
// CRC32C checksum as data, meaning the upload would be a no-op. Any error
// reading the object's attributes (other than ErrNotFound) is logged and
// treated as "content changed" so the upload proceeds.
func gcsContentUnchanged(ctx context.Context, client clients.CloudStorage, path string, data []byte) bool {
attrs, err := client.ReadObjectAttrs(ctx, path)
if err != nil {
if !errors.Is(err, clients.ErrNotFound) {
logger.WarnContext(ctx, "failed to read object attrs, proceeding with upload", slog.String("path", path), slog.Any("err", err))
}
return false
}
if attrs.CRC32C == crc32.Checksum(data, crc32cTable) {
logger.InfoContext(ctx, "skipping upload, content unchanged", slog.String("path", path))
return true
}
return false
}
134 changes: 134 additions & 0 deletions go/cmd/exporter/writer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package main

import (
"context"
"path/filepath"
"sync"
"testing"

"github.com/google/osv.dev/go/osv/clients"
"github.com/google/osv.dev/go/testutils"
)

// runWriter sends msgs to a writer goroutine and waits for it to finish.
func runWriter(t *testing.T, storage clients.CloudStorage, pathPrefix string, msgs []writeMsg) {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

inCh := make(chan writeMsg, len(msgs))
for _, m := range msgs {
inCh <- m
}
close(inCh)

var wg sync.WaitGroup
wg.Add(1)
go writer(ctx, cancel, inCh, storage, pathPrefix, &wg)
wg.Wait()
}

func TestWriter_GCS_SkipsUnchangedContent(t *testing.T) {
storage := testutils.NewMockStorage()
data := []byte(`{"id":"OSV-1"}`)

// Pre-populate using the same path the writer will compute.
objPath := filepath.Join("out", "OSV-1.json")
if err := storage.WriteObject(t.Context(), objPath, data, nil); err != nil {
t.Fatalf("setup: %v", err)
}
attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath)

runWriter(t, storage, "out", []writeMsg{
{path: "OSV-1.json", mimeType: "application/json", data: data},
})

attrsAfter, err := storage.ReadObjectAttrs(t.Context(), objPath)
if err != nil {
t.Fatalf("ReadObjectAttrs: %v", err)
}
if attrsAfter.Generation != attrsBefore.Generation {
t.Errorf("expected generation %d (skipped), got %d", attrsBefore.Generation, attrsAfter.Generation)
}
}

func TestWriter_GCS_UploadsChangedContent(t *testing.T) {
storage := testutils.NewMockStorage()

objPath := filepath.Join("out", "OSV-1.json")
if err := storage.WriteObject(t.Context(), objPath, []byte(`{"id":"OSV-1","old":true}`), nil); err != nil {
t.Fatalf("setup: %v", err)
}
attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath)

runWriter(t, storage, "out", []writeMsg{
{path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1","old":false}`)},
})

attrsAfter, err := storage.ReadObjectAttrs(t.Context(), objPath)
if err != nil {
t.Fatalf("ReadObjectAttrs: %v", err)
}
if attrsAfter.Generation <= attrsBefore.Generation {
t.Errorf("expected generation > %d (uploaded), got %d", attrsBefore.Generation, attrsAfter.Generation)
}
}

func TestWriter_GCS_UploadsNewObject(t *testing.T) {
storage := testutils.NewMockStorage()

runWriter(t, storage, "out", []writeMsg{
{path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1"}`)},
})

objPath := filepath.Join("out", "OSV-1.json")
attrs, err := storage.ReadObjectAttrs(t.Context(), objPath)
if err != nil {
t.Fatalf("expected object to exist after upload: %v", err)
}
if attrs.Generation != 1 {
t.Errorf("expected generation 1 for new object, got %d", attrs.Generation)
}
}

func TestWriter_GCS_SkipsMultipleUnchanged(t *testing.T) {
storage := testutils.NewMockStorage()

type entry struct {
msgPath string
objPath string
data []byte
}
entries := []entry{
{"A.json", filepath.Join("out", "A.json"), []byte(`{"id":"A"}`)},
{"B.json", filepath.Join("out", "B.json"), []byte(`{"id":"B"}`)},
{"C.json", filepath.Join("out", "C.json"), []byte(`{"id":"C"}`)},
}
for _, e := range entries {
if err := storage.WriteObject(t.Context(), e.objPath, e.data, nil); err != nil {
t.Fatalf("setup %s: %v", e.objPath, err)
}
}

gensBefore := make(map[string]int64)
for _, e := range entries {
attrs, _ := storage.ReadObjectAttrs(t.Context(), e.objPath)
gensBefore[e.objPath] = attrs.Generation
}

msgs := make([]writeMsg, len(entries))
for i, e := range entries {
msgs[i] = writeMsg{path: e.msgPath, mimeType: "application/json", data: e.data}
}
runWriter(t, storage, "out", msgs)

for _, e := range entries {
attrs, err := storage.ReadObjectAttrs(t.Context(), e.objPath)
if err != nil {
t.Fatalf("ReadObjectAttrs(%s): %v", e.objPath, err)
}
if attrs.Generation != gensBefore[e.objPath] {
t.Errorf("%s: expected generation %d (skipped), got %d", e.objPath, gensBefore[e.objPath], attrs.Generation)
}
}
}
Loading