Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go/cmd/dolt/cli/arg_parser_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ func CreateDiffArgParser(isTableFunction bool) *argparser.ArgParser {
ap.SupportsString(FormatFlag, "r", "result output format", "How to format diff output. Valid values are tabular, sql, json. Defaults to tabular.")
ap.SupportsString(WhereParam, "", "column", "filters columns based on values in the diff. See {{.EmphasisLeft}}dolt diff --help{{.EmphasisRight}} for details.")
ap.SupportsInt(LimitParam, "", "record_count", "limits to the first N diffs.")
ap.SupportsString(FilterParam, "", "diff_type", "filters results based on the type of modification (added, modified, removed).")
ap.SupportsFlag(StagedFlag, "", "Show only the staged data changes.")
ap.SupportsFlag(CachedFlag, "c", "Synonym for --staged")
ap.SupportsFlag(MergeBase, "", "Uses merge base of the first commit and second commit (or HEAD if not supplied) as the first commit")
Expand Down
1 change: 1 addition & 0 deletions go/cmd/dolt/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ const (
SummaryFlag = "summary"
WhereParam = "where"
LimitParam = "limit"
FilterParam = "filter"
MergeBase = "merge-base"
DiffMode = "diff-mode"
ReverseFlag = "reverse"
Expand Down
203 changes: 191 additions & 12 deletions go/cmd/dolt/commands/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ import (
"github.com/gocraft/dbr/v2"
"github.com/gocraft/dbr/v2/dialect"

eventsapi "github.com/dolthub/eventsapi_schema/dolt/services/eventsapi/v1alpha1"

"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/cmd/dolt/errhand"
"github.com/dolthub/dolt/go/libraries/doltcore/diff"
Expand All @@ -42,7 +44,6 @@ import (
"github.com/dolthub/dolt/go/libraries/utils/argparser"
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
"github.com/dolthub/dolt/go/libraries/utils/set"
eventsapi "github.com/dolthub/eventsapi_schema/dolt/services/eventsapi/v1alpha1"
)

type diffOutput int
Expand Down Expand Up @@ -86,6 +87,8 @@ The diffs displayed can be limited to show the first N by providing the paramete

To filter which data rows are displayed, use {{.EmphasisLeft}}--where <SQL expression>{{.EmphasisRight}}. Table column names in the filter expression must be prefixed with {{.EmphasisLeft}}from_{{.EmphasisRight}} or {{.EmphasisLeft}}to_{{.EmphasisRight}}, e.g. {{.EmphasisLeft}}to_COLUMN_NAME > 100{{.EmphasisRight}} or {{.EmphasisLeft}}from_COLUMN_NAME + to_COLUMN_NAME = 0{{.EmphasisRight}}.

To filter diff output by change type, use {{.EmphasisLeft}}--filter <type>{{.EmphasisRight}} where {{.EmphasisLeft}}<type>{{.EmphasisRight}} is one of {{.EmphasisLeft}}added{{.EmphasisRight}}, {{.EmphasisLeft}}modified{{.EmphasisRight}}, or {{.EmphasisLeft}}removed{{.EmphasisRight}}. The {{.EmphasisLeft}}added{{.EmphasisRight}} filter shows only additions (new tables or rows), {{.EmphasisLeft}}modified{{.EmphasisRight}} shows only modifications (schema changes, renames, or row updates), and {{.EmphasisLeft}}removed{{.EmphasisRight}} shows only deletions (dropped tables or deleted rows). For example, {{.EmphasisLeft}}dolt diff --filter=removed{{.EmphasisRight}} shows only deleted rows and dropped tables.

The {{.EmphasisLeft}}--diff-mode{{.EmphasisRight}} argument controls how modified rows are presented when the format output is set to {{.EmphasisLeft}}tabular{{.EmphasisRight}}. When set to {{.EmphasisLeft}}row{{.EmphasisRight}}, modified rows are presented as old and new rows. When set to {{.EmphasisLeft}}line{{.EmphasisRight}}, modified rows are presented as a single row, and changes are presented using "+" and "-" within the column. When set to {{.EmphasisLeft}}in-place{{.EmphasisRight}}, modified rows are presented as a single row, and changes are presented side-by-side with a color distinction (requires a color-enabled terminal). When set to {{.EmphasisLeft}}context{{.EmphasisRight}}, rows that contain at least one column that spans multiple lines uses {{.EmphasisLeft}}line{{.EmphasisRight}}, while all other rows use {{.EmphasisLeft}}row{{.EmphasisRight}}. The default value is {{.EmphasisLeft}}context{{.EmphasisRight}}.
`,
Synopsis: []string{
Expand All @@ -102,6 +105,7 @@ type diffDisplaySettings struct {
where string
skinny bool
includeCols []string
filter *diffTypeFilter
}

type diffDatasets struct {
Expand Down Expand Up @@ -130,6 +134,133 @@ type diffStatistics struct {
NewCellCount uint64
}

// diffTypeFilter manages which diff types should be included in the output.
// When filters is nil or empty, all types are included.
type diffTypeFilter struct {
// Map of diff type -> should include
// If nil or empty, includes all types
filters map[string]bool
}

// newDiffTypeFilter creates a filter for the specified diff type.
// Pass diff.DiffTypeAll or empty string to include all types.
func newDiffTypeFilter(filterType string) *diffTypeFilter {
if filterType == "" || filterType == diff.DiffTypeAll {
return &diffTypeFilter{filters: nil} // nil means include all
}

return &diffTypeFilter{
filters: map[string]bool{
filterType: true,
},
}
}

// shouldInclude checks if the given diff type should be included.
// Uses TableDeltaSummary.DiffType field for table-level filtering.
func (df *diffTypeFilter) shouldInclude(diffType string) bool {
// nil or empty filters means include everything
if df.filters == nil || len(df.filters) == 0 {
return true
}

return df.filters[diffType]
}

// isValid validates the filter configuration
func (df *diffTypeFilter) isValid() bool {
if df.filters == nil {
return true
}

for filterType := range df.filters {
if filterType != diff.DiffTypeAdded &&
filterType != diff.DiffTypeModified &&
filterType != diff.DiffTypeRemoved {
return false
}
}
return true
}

// shouldSkipRow checks if a row should be skipped based on the filter settings.
// Uses the DiffType infrastructure for consistency with table-level filtering.
func shouldSkipRow(filter *diffTypeFilter, rowChangeType diff.ChangeType) bool {
if filter == nil {
return false
}

// Don't filter None - it represents "no row" on one side of the diff
if rowChangeType == diff.None {
return false
}

// Convert row-level ChangeType to table-level DiffType string
diffType := diff.ChangeTypeToDiffType(rowChangeType)

// Use the map-based shouldInclude method
return !filter.shouldInclude(diffType)
}

// shouldUseLazyHeader determines if we should delay printing the table header
// until we know there are rows to display. This prevents empty headers when
// all rows are filtered out in data-only diffs.
func shouldUseLazyHeader(dArgs *diffArgs, tableSummary diff.TableDeltaSummary) bool {
return dArgs.filter != nil && dArgs.filter.filters != nil &&
!tableSummary.SchemaChange && !tableSummary.IsRename()
}

// lazyRowWriter wraps a SqlRowDiffWriter and delays calling BeginTable
// until the first row is actually written. This prevents empty table headers
// when all rows are filtered out.
type lazyRowWriter struct {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is this looking @elianddb?

Also, instead of having initialized just added nil check on onFirstWrite

writer diff.SqlRowDiffWriter

// Callback to invoke before first write
// Set to nil after first call
onFirstWrite func() error
}

// newLazyRowWriter creates a lazy writer that wraps the given writer.
// The onFirstWrite callback is invoked exactly once before the first write.
func newLazyRowWriter(writer diff.SqlRowDiffWriter, onFirstWrite func() error) *lazyRowWriter {
return &lazyRowWriter{
writer: writer,
onFirstWrite: onFirstWrite,
}
}

// WriteRow implements diff.SqlRowDiffWriter
func (l *lazyRowWriter) WriteRow(ctx *sql.Context, row sql.Row, diffType diff.ChangeType, colDiffTypes []diff.ChangeType) error {
// Initialize on first write
if l.onFirstWrite != nil {
if err := l.onFirstWrite(); err != nil {
return err
}
l.onFirstWrite = nil // Prevent double-initialization
}

return l.writer.WriteRow(ctx, row, diffType, colDiffTypes)
}

// WriteCombinedRow implements diff.SqlRowDiffWriter
func (l *lazyRowWriter) WriteCombinedRow(ctx *sql.Context, oldRow, newRow sql.Row, mode diff.Mode) error {
// Initialize on first write
if l.onFirstWrite != nil {
if err := l.onFirstWrite(); err != nil {
return err
}
l.onFirstWrite = nil
}

return l.writer.WriteCombinedRow(ctx, oldRow, newRow, mode)
}

// Close implements diff.SqlRowDiffWriter
func (l *lazyRowWriter) Close(ctx context.Context) error {
return l.writer.Close(ctx)
}

type DiffCmd struct{}

// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
Expand Down Expand Up @@ -220,6 +351,15 @@ func (cmd DiffCmd) validateArgs(apr *argparser.ArgParseResults) errhand.VerboseE
return errhand.BuildDError("invalid output format: %s", f).Build()
}

filterValue, hasFilter := apr.GetValue(cli.FilterParam)
if hasFilter {
filter := newDiffTypeFilter(filterValue)
if !filter.isValid() {
return errhand.BuildDError("invalid filter: %s. Valid values are: %s, %s, %s",
filterValue, diff.DiffTypeAdded, diff.DiffTypeModified, diff.DiffTypeRemoved).Build()
}
}

return nil
}

Expand Down Expand Up @@ -268,6 +408,9 @@ func parseDiffDisplaySettings(apr *argparser.ArgParseResults) *diffDisplaySettin
displaySettings.limit, _ = apr.GetInt(cli.LimitParam)
displaySettings.where = apr.GetValueOrDefault(cli.WhereParam, "")

filterValue := apr.GetValueOrDefault(cli.FilterParam, diff.DiffTypeAll)
displaySettings.filter = newDiffTypeFilter(filterValue)

return displaySettings
}

Expand Down Expand Up @@ -670,13 +813,13 @@ func getSchemaDiffSummariesBetweenRefs(queryist cli.Queryist, sqlCtx *sql.Contex
tableName = fromTable
}
case fromTable == "":
diffType = "added"
diffType = diff.DiffTypeAdded
tableName = toTable
case toTable == "":
diffType = "dropped"
diffType = diff.DiffTypeRemoved
tableName = fromTable
case fromTable != "" && toTable != "" && fromTable != toTable:
diffType = "renamed"
diffType = diff.DiffTypeModified // Renamed tables are treated as modified
tableName = toTable
default:
return nil, fmt.Errorf("error: unexpected schema diff case: fromTable='%s', toTable='%s'", fromTable, toTable)
Expand Down Expand Up @@ -738,14 +881,17 @@ func getDiffSummariesBetweenRefs(queryist cli.Queryist, sqlCtx *sql.Context, fro
}

switch summary.DiffType {
case "dropped":
case diff.DiffTypeRemoved:
summary.TableName = summary.FromTableName
case "added":
case diff.DiffTypeAdded:
summary.TableName = summary.ToTableName
case "renamed":
summary.TableName = summary.ToTableName
case "modified":
summary.TableName = summary.FromTableName
case diff.DiffTypeModified:
// For renamed tables, use ToTableName; for other modifications, use FromTableName
if summary.FromTableName.Name != summary.ToTableName.Name {
summary.TableName = summary.ToTableName
} else {
summary.TableName = summary.FromTableName
}
default:
return nil, fmt.Errorf("error: unexpected diff type '%s'", summary.DiffType)
}
Expand Down Expand Up @@ -816,6 +962,16 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs)
continue
}

// Apply table-level filtering based on diff type
if dArgs.filter != nil && dArgs.filter.filters != nil {
// For data-only changes (no schema/rename), always let them through for row-level filtering
isDataOnlyChange := !delta.SchemaChange && !delta.IsRename() && delta.DataChange

if !isDataOnlyChange && !dArgs.filter.shouldInclude(delta.DiffType) {
continue // Skip this table
}
}

if strings.HasPrefix(delta.ToTableName.Name, diff.DBPrefix) {
verr := diffDatabase(queryist, sqlCtx, delta, dArgs, dw)
if verr != nil {
Expand Down Expand Up @@ -1110,7 +1266,7 @@ func diffUserTable(
fromTable := tableSummary.FromTableName
toTable := tableSummary.ToTableName

if dArgs.diffParts&NameOnlyDiff == 0 {
if dArgs.diffParts&NameOnlyDiff == 0 && !shouldUseLazyHeader(dArgs, tableSummary) {
// TODO: schema names
err := dw.BeginTable(tableSummary.FromTableName.Name, tableSummary.ToTableName.Name, tableSummary.IsAdd(), tableSummary.IsDrop())
if err != nil {
Expand Down Expand Up @@ -1446,11 +1602,27 @@ func diffRows(
}

// We always instantiate a RowWriter in case the diffWriter needs it to close off any work from schema output
rowWriter, err := dw.RowWriter(fromTableInfo, toTableInfo, tableSummary, unionSch)
var rowWriter diff.SqlRowDiffWriter
realWriter, err := dw.RowWriter(fromTableInfo, toTableInfo, tableSummary, unionSch)
if err != nil {
return errhand.VerboseErrorFromError(err)
}

if shouldUseLazyHeader(dArgs, tableSummary) {
// Wrap with lazy writer to delay BeginTable until first row write
onFirstWrite := func() error {
return dw.BeginTable(
tableSummary.FromTableName.Name,
tableSummary.ToTableName.Name,
tableSummary.IsAdd(),
tableSummary.IsDrop(),
)
}
rowWriter = newLazyRowWriter(realWriter, onFirstWrite)
} else {
rowWriter = realWriter
}

// can't diff
if !diffable {
// TODO: this messes up some structured output if the user didn't redirect it
Expand Down Expand Up @@ -1708,6 +1880,13 @@ func writeDiffResults(
return err
}

// Apply row-level filtering based on diff type
if dArgs.filter != nil {
if shouldSkipRow(dArgs.filter, oldRow.RowDiff) || shouldSkipRow(dArgs.filter, newRow.RowDiff) {
continue
}
}

if dArgs.skinny {
var filteredOldRow, filteredNewRow diff.RowDiff
for i, changeType := range newRow.ColDiffs {
Expand Down
Loading
Loading