Skip to content
Draft

[DNM] #5039

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/kafka-consumer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"os/signal"
"runtime/debug"
"syscall"
"time"

"github.com/google/uuid"
"github.com/pingcap/log"
Expand Down Expand Up @@ -61,6 +62,9 @@ func main() {
flag.StringVar(&consumerOption.ca, "ca", "", "CA certificate path for Kafka SSL connection")
flag.StringVar(&consumerOption.cert, "cert", "", "Certificate path for Kafka SSL connection")
flag.StringVar(&consumerOption.key, "key", "", "Private key path for Kafka SSL connection")
flag.BoolVar(&consumerOption.enableSyncpoint, "enable-syncpoint", false, "enable periodic aligned syncpoint records in downstream")
flag.DurationVar(&consumerOption.syncpointInterval, "syncpoint-interval", 10*time.Minute, "interval used to align downstream syncpoint records")
flag.DurationVar(&consumerOption.syncpointRetention, "syncpoint-retention", 24*time.Hour, "retention used to clean old downstream syncpoint records")
flag.Parse()

err := logger.InitLogger(&logger.Config{
Expand Down
14 changes: 13 additions & 1 deletion cmd/kafka-consumer/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"net/url"
"strconv"
"strings"
"time"

"github.com/pingcap/log"
"github.com/pingcap/ticdc/cmd/util"
Expand Down Expand Up @@ -56,6 +57,10 @@ type option struct {
upstreamTiDBDSN string

enableTableAcrossNodes bool

enableSyncpoint bool
syncpointInterval time.Duration
syncpointRetention time.Duration
}

func newOption() *option {
Expand Down Expand Up @@ -161,6 +166,10 @@ func (o *option) Adjust(upstreamURIStr string, configFile string) {
o.codecConfig.AvroEnableWatermark = true
}
o.enableTableAcrossNodes = putil.GetOrZero(replicaConfig.Scheduler.EnableTableAcrossNodes)
if o.enableSyncpoint && o.syncpointInterval <= 0 {
log.Panic("syncpoint interval must be positive when syncpoint is enabled",
zap.Duration("syncpointInterval", o.syncpointInterval))
}

log.Info("consumer option adjusted",
zap.String("address", strings.Join(o.address, ",")),
Expand All @@ -173,5 +182,8 @@ func (o *option) Adjust(upstreamURIStr string, configFile string) {
zap.Int("maxBatchSize", o.maxBatchSize),
zap.String("configFile", configFile),
zap.String("upstreamURI", upstreamURI.String()),
zap.String("downstreamURI", o.downstreamURI))
zap.String("downstreamURI", o.downstreamURI),
zap.Bool("enableSyncpoint", o.enableSyncpoint),
zap.Duration("syncpointInterval", o.syncpointInterval),
zap.Duration("syncpointRetention", o.syncpointRetention))
}
206 changes: 206 additions & 0 deletions cmd/kafka-consumer/syncpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
// Copyright 2026 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"context"
"database/sql"
"fmt"
"net/url"
"strconv"
"time"

"github.com/pingcap/errors"
"github.com/pingcap/log"
commonType "github.com/pingcap/ticdc/pkg/common"
"github.com/pingcap/ticdc/pkg/config"
cerrors "github.com/pingcap/ticdc/pkg/errors"
"github.com/pingcap/ticdc/pkg/filter"
mysqlcfg "github.com/pingcap/ticdc/pkg/sink/mysql"
"go.uber.org/zap"
)

const consumerSyncpointTable = "consumer_syncpoint_v1"

type consumerSyncpointStore interface {
Init(ctx context.Context) (uint64, error)
Write(ctx context.Context, primaryTs uint64) error

Check failure on line 38 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

ST1003: interface method parameter primaryTs should be primaryTS (staticcheck)
Close() error
}

type consumerSyncpointStoreConfig struct {
downstreamURI string
consumerID string
topic string
retention time.Duration
}

type mysqlConsumerSyncpointStore struct {
db *sql.DB
consumerID string
topic string
retention time.Duration
}

func newMySQLConsumerSyncpointStore(
ctx context.Context,
cfg consumerSyncpointStoreConfig,
) (consumerSyncpointStore, error) {
sinkURI, err := url.Parse(cfg.downstreamURI)
if err != nil {
return nil, cerrors.WrapError(cerrors.ErrSinkURIInvalid, err)
}
scheme := config.GetScheme(sinkURI)
if !config.IsMySQLCompatibleScheme(scheme) {
return nil, cerrors.ErrInvalidReplicaConfig.FastGenByArgs(
"consumer syncpoint requires a tidb or mysql downstream")
}
changefeedID := commonType.NewChangeFeedIDWithName(cfg.consumerID, commonType.DefaultKeyspaceName)
changefeedConfig := &config.ChangefeedConfig{
ChangefeedID: changefeedID,
SinkURI: cfg.downstreamURI,
SinkConfig: config.GetDefaultReplicaConfig().Sink,
}
changefeedConfig.SinkConfig.TiDBSourceID = 1
_, db, err := mysqlcfg.NewMysqlConfigAndDB(ctx, changefeedID, sinkURI, changefeedConfig)
if err != nil {
return nil, errors.Trace(err)
}
return &mysqlConsumerSyncpointStore{
db: db,
consumerID: cfg.consumerID,
topic: cfg.topic,
retention: cfg.retention,
}, nil
}

func (s *mysqlConsumerSyncpointStore) Init(ctx context.Context) (uint64, error) {
if err := s.createTable(ctx); err != nil {
return 0, err
}
query := fmt.Sprintf(

Check failure on line 92 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

G201: SQL string formatting (gosec)
"SELECT primary_ts FROM %s.%s WHERE consumer_id = ? AND topic = ? ORDER BY CAST(primary_ts AS UNSIGNED) DESC LIMIT 1",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using CAST(primary_ts AS UNSIGNED) in the ORDER BY clause prevents the database from using the index on the primary key (consumer_id, topic, primary_ts). If the column type is changed to BIGINT UNSIGNED (as suggested elsewhere), this cast can be removed, allowing for a much more efficient index-backed sort.

Suggested change
"SELECT primary_ts FROM %s.%s WHERE consumer_id = ? AND topic = ? ORDER BY CAST(primary_ts AS UNSIGNED) DESC LIMIT 1",
"SELECT primary_ts FROM %s.%s WHERE consumer_id = ? AND topic = ? ORDER BY primary_ts DESC LIMIT 1",

filter.TiCDCSystemSchema,
consumerSyncpointTable,
)
var primaryTs string

Check failure on line 97 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

ST1003: var primaryTs should be primaryTS (staticcheck)
err := s.db.QueryRowContext(ctx, query, s.consumerID, s.topic).Scan(&primaryTs)
if err == sql.ErrNoRows {

Check failure on line 99 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

comparing with == will fail on wrapped errors. Use errors.Is to check for a specific error (errorlint)
return 0, nil
}
if err != nil {
return 0, cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}
ts, err := strconv.ParseUint(primaryTs, 10, 64)
if err != nil {
return 0, cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}
return ts, nil
}

func (s *mysqlConsumerSyncpointStore) createTable(ctx context.Context) error {
createDatabaseQuery := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", filter.TiCDCSystemSchema)
if _, err := s.db.ExecContext(ctx, createDatabaseQuery); err != nil {
return cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}
createTableQuery := fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s.%s
(
ticdc_cluster_id varchar(255),
consumer_id varchar(255),
topic varchar(255),
primary_ts varchar(18),
secondary_ts varchar(18),
Comment on lines +122 to +123
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The primary_ts and secondary_ts columns are defined as varchar(18). However, a 64-bit unsigned integer (TSO) can have up to 20 digits. Using varchar(18) may lead to truncation or insertion errors as TSOs grow over time. Additionally, storing numeric timestamps as strings makes sorting and range queries inefficient. It is recommended to use BIGINT UNSIGNED for these columns to ensure future-proofing and better performance.

Suggested change
primary_ts varchar(18),
secondary_ts varchar(18),
primary_ts BIGINT UNSIGNED,
secondary_ts BIGINT UNSIGNED,

created_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
INDEX (created_at),
PRIMARY KEY (consumer_id, topic, primary_ts)
);`, filter.TiCDCSystemSchema, consumerSyncpointTable)
if _, err := s.db.ExecContext(ctx, createTableQuery); err != nil {
return cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}
return nil
}

func (s *mysqlConsumerSyncpointStore) Write(ctx context.Context, primaryTs uint64) error {

Check failure on line 134 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

ST1003: method parameter primaryTs should be primaryTS (staticcheck)
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}
committed := false
defer func() {
if !committed {
if rollbackErr := tx.Rollback(); rollbackErr != nil {
log.Warn("rollback consumer syncpoint transaction failed", zap.Error(rollbackErr))
}
}
}()

secondaryTs := "0"

Check failure on line 148 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

ST1003: var secondaryTs should be secondaryTS (staticcheck)
gotSecondaryTs := true
if err = tx.QueryRowContext(ctx, "select @@tidb_current_ts").Scan(&secondaryTs); err != nil {
gotSecondaryTs = false
log.Warn("get downstream tidb current ts failed, use zero secondary ts",
zap.Uint64("primaryTs", primaryTs), zap.Error(err))
}

insertQuery := fmt.Sprintf(

Check failure on line 156 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

G201: SQL string formatting (gosec)
"INSERT IGNORE INTO %s.%s (ticdc_cluster_id, consumer_id, topic, primary_ts, secondary_ts) VALUES (?, ?, ?, ?, ?)",
filter.TiCDCSystemSchema,
consumerSyncpointTable,
)
if _, err = tx.ExecContext(ctx, insertQuery,
config.GetGlobalServerConfig().ClusterID,
s.consumerID,
s.topic,
strconv.FormatUint(primaryTs, 10),
secondaryTs,
); err != nil {
return cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}

if gotSecondaryTs {
setExternalTsQuery := fmt.Sprintf("SET GLOBAL tidb_external_ts = %s", secondaryTs)
if _, err = tx.ExecContext(ctx, setExternalTsQuery); err != nil {
if cerrors.IsSyncPointIgnoreError(err) {
log.Warn("set global external ts failed, ignore this error", zap.Error(err))
} else {
return cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}
}
}

if s.retention > 0 {
cleanupQuery := fmt.Sprintf(

Check failure on line 183 in cmd/kafka-consumer/syncpoint.go

View workflow job for this annotation

GitHub Actions / Check

G201: SQL string formatting (gosec)
"DELETE IGNORE FROM %s.%s WHERE consumer_id = ? AND topic = ? AND created_at < (NOW() - INTERVAL %d SECOND)",
filter.TiCDCSystemSchema,
consumerSyncpointTable,
int64(s.retention.Seconds()),
)
if _, err = tx.ExecContext(ctx, cleanupQuery, s.consumerID, s.topic); err != nil {
log.Warn("cleanup stale consumer syncpoint records failed", zap.Error(err))
}
}
Comment on lines +182 to +192
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The cleanup of stale syncpoint records is performed on every Write call within the same transaction. This is inefficient and can increase the latency of syncpoint writes, especially if the table contains many records. It is recommended to throttle this cleanup logic (e.g., run it only once per hour) to reduce the load on the downstream database, similar to the implementation in the standard MySQL sink.


if err = tx.Commit(); err != nil {
return cerrors.WrapError(cerrors.ErrMySQLTxnError, errors.Trace(err))
}
committed = true
return nil
}

func (s *mysqlConsumerSyncpointStore) Close() error {
if s == nil || s.db == nil {
return nil
}
return s.db.Close()
}
49 changes: 49 additions & 0 deletions cmd/kafka-consumer/syncpoint_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2026 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"context"
"testing"

"github.com/DATA-DOG/go-sqlmock"
"github.com/stretchr/testify/require"
)

func TestMysqlConsumerSyncpointStoreWriteReadsCurrentTsInTxn(t *testing.T) {
t.Parallel()

db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
require.NoError(t, err)
defer db.Close()

store := &mysqlConsumerSyncpointStore{
db: db,
consumerID: "consumer-1",
topic: "topic-1",
}

mock.ExpectBegin()
mock.ExpectQuery("select @@tidb_current_ts").
WillReturnRows(sqlmock.NewRows([]string{"@@tidb_current_ts"}).AddRow("456"))
mock.ExpectExec("INSERT IGNORE INTO tidb_cdc.consumer_syncpoint_v1 (ticdc_cluster_id, consumer_id, topic, primary_ts, secondary_ts) VALUES (?, ?, ?, ?, ?)").
WithArgs("default", "consumer-1", "topic-1", "123", "456").
WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectExec("SET GLOBAL tidb_external_ts = 456").
WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectCommit()

require.NoError(t, store.Write(context.Background(), 123))
require.NoError(t, mock.ExpectationsWereMet())
}
Loading
Loading