Weird behavior when removing / updating a node reference in a single transaction

Moved from GitHub dgraph/5411

Posted by jostillmanns:

What version of Dgraph are you using?

v2.30.1

Have you tried reproducing the issue with the latest release?

yes

What is the hardware spec (RAM, OS)?

Steps to reproduce the issue (command/config used to run Dgraph).

run the following test:

package main

import (
	"context"
	"encoding/json"
	"fmt"
	"testing"

	"github.com/dgraph-io/dgo/v2"
	"github.com/dgraph-io/dgo/v2/protos/api"
	"github.com/stretchr/testify/require"
	"google.golang.org/grpc"
)

func Test_it_replaces_connection(t *testing.T) {
	conn, err := grpc.Dial(dgraphAddr(), grpc.WithInsecure())
	if err != nil {
		t.Fatalf("conntect: %v", err)
	}

	schema := `
type node {
  node.child: uid
  node.name: string
}

node.child: uid .
node.name: string .
`

	in := map[string]interface{}{
		"uid":       "_:probe",
		"node.name": "parent",
		"node.child": map[string]interface{}{
			"node.name": "child",
		},
	}

	js, err := json.Marshal(in)
	require.NoError(t, err)

	dgraph := dgo.NewDgraphClient(api.NewDgraphClient(conn))
	defer TearDown(dgraph)

	err = dgraph.Alter(context.Background(), &api.Operation{Schema: schema})
	require.NoError(t, err)

	resp, err := dgraph.NewTxn().Mutate(context.Background(), &api.Mutation{SetJson: js, CommitNow: true})
	require.NoError(t, err)

	query := `
query {
  resp (func: uid(%s)) {
    uid
    node.name
    node.child {
      uid
      node.name
    }
  }
}
`

	queryResp, err := dgraph.NewReadOnlyTxn().Query(context.Background(), fmt.Sprintf(query, resp.Uids["probe"]))
	require.NoError(t, err)

	var out struct {
		Resp []map[string]interface{} `json:"resp"`
	}
	err = json.Unmarshal(queryResp.GetJson(), &out)
	require.NoError(t, err)

	require.Len(t, out.Resp, 1)
	require.Equal(t, "parent", out.Resp[0]["node.name"])
	require.Equal(t, "child", out.Resp[0]["node.child"].(map[string]interface{})["node.name"])

	remove := map[string]interface{}{
		"uid":        out.Resp[0]["uid"],
		"node.child": nil,
	}

	js, err = json.Marshal(remove)
	require.NoError(t, err)

	txn := dgraph.NewTxn()
	defer txn.Discard(context.Background())

	_, err = txn.Mutate(context.Background(), &api.Mutation{DeleteJson: js})
	require.NoError(t, err)

	queryResp, err = txn.Query(context.Background(), fmt.Sprintf(query, out.Resp[0]["uid"]))
	require.NoError(t, err)

	err = json.Unmarshal([]byte(`{"resp": null}`), &out)
	require.NoError(t, err)

	err = json.Unmarshal(queryResp.GetJson(), &out)
	require.NoError(t, err)

	_, ok := out.Resp[0]["node.child"]
	require.False(t, ok)

	update := map[string]interface{}{
		"uid":       out.Resp[0]["uid"],
		"node.name": "parent",
		"node.child": map[string]interface{}{
			"node.name": "child 2",
		},
	}

	js, err = json.Marshal(update)
	require.NoError(t, err)

	_, err = txn.Mutate(context.Background(), &api.Mutation{SetJson: js})
	require.NoError(t, err)

	err = txn.Commit(context.Background())
	require.NoError(t, err)

	query2 := `
query {
  resp (func: uid(%s)) {
	uid
	node.name
	node.child {
	  uid
	}
  }
}
`

	queryResp, err = dgraph.NewTxn().Query(context.Background(), fmt.Sprintf(query2, out.Resp[0]["uid"]))
	require.NoError(t, err)

	fmt.Println(string(queryResp.GetJson()))

	queryResp, err = dgraph.NewTxn().Query(context.Background(), fmt.Sprintf(query, out.Resp[0]["uid"]))
	require.NoError(t, err)

	fmt.Println(string(queryResp.GetJson()))
}

Expected behaviour and actual result.

In the test I take an existing node node.name: parent and a child node node.name: child and try to replace it with a new node node.name: child 2. Prior to v1.1.1 you could only replace a node reference by removing the old reference first. This is why we still had a workaround in place that is described in the test. I also wrote the issue that led to the patch, that allowed simply replacing the reference without removing it first: #4136 . Anyway, I think that the workaround should still work, but it leads to very weird behavior. I have two println statements in the test, which give the following output:

{"resp":[{"uid":"0x55769","node.name":"parent","node.child":{"uid":["0x55768","0x5576a"]}}]}
{"resp":[{"uid":"0x55769","node.name":"parent","node.child":{"uid":"0x55768","node.name":"child","uid":"0x5576a","node.name":"child 2"}}]}

As you can see, the node.child predicate now points to an array of uids, which violates the schema definition, but only of the uid is queried without any further predicate. If you also query other predicates, you get this weird json, which also shows the values of the old child node.