Invalid memory address or nil pointer dereference error

invalid memory address or nil pointer dereference
SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x14fb4be
upsert{
  query{
    v as var(func: eq(id,12)) # this one has 1 000 000 followers
  }
  mutation {
   delete{
    uid(v) <followers>  * .
   }
  }
}

followers has count and reverse index.

code at this line

@ibrahim ideas?

@BlankRain - wondering if you could share the dataset?

@BlankRain can you please show the stack trace? and the version on which you saw this panic.

Dgraph version   : v20.07.0-12-g681fe9116
Dgraph codename  : shuri-mod
Go version       : go1.14.1

@chewxy we use twitter data:

wget http://an.kaist.ac.kr/\~haewoon/release/twitter_social_graph/twitter_rv.tar.gz 

# split by line
split -l 54535107 twitter_rv.net

then use the go code to convert csv data to rdf

package main
import (
    "path/filepath"
    "os"
    "fmt"
	"flag"
	"bufio"
	"io"
	"strings"
)

func getFilelist(path string) []string {
	files := []string{}
    err := filepath.Walk(path, func(path string, f os.FileInfo, err error) error {
        if f == nil {
			return err
		}
        if f.IsDir() {
			return nil
		}
		files = append(files,path)
        return nil
    })
    if err != nil {
        fmt.Printf("filepath.Walk() returned %v\n", err)
	}
	return files
}

func rdf(f string, separator string, ch chan string) {
	rv, err := os.Open(f)
	
	if err != nil {
		fmt.Println("open file err=", err)
	    rv.Close()
		return
	}

	defer rv.Close()

	out, _ := os.OpenFile(f+".rdf", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0644)
	
	reader := bufio.NewReader(rv)
	wirter := bufio.NewWriter(out)

	nodes := make(map[string]bool)
	//循环的读取文件的内容
	errcount := 0
	for {
		str, err := reader.ReadString('\n') // 读到一个换行就结束
		if err == io.EOF {                  // io.EOF表示文件的末尾
			break
		}
		//输出内容
		line := strings.Split(str, separator)
		if len(line) != 2 {
			errcount++
			fmt.Printf("from now on find %d errors in %s\n", errcount, f)
			continue
		}
		src := strings.Trim(line[0], "\n")
		dst := strings.Trim(line[1], "\n")
		tpl := "_:v%s <id> \"%s\" . \n_:v%s <dgraph.type> \"twitter_user\" .\n"
		if !nodes[src] {
			lout := fmt.Sprintf(tpl, src, src, src)
			wirter.Write([]byte(lout))
			nodes[src] = true
		}
		if !nodes[dst] {
			lout := fmt.Sprintf(tpl, dst, dst, dst)
			wirter.Write([]byte(lout))
			nodes[dst] = true
		}

		lout := fmt.Sprintf("_:v%s <followers> _:v%s . \n", src, dst)
		wirter.Write([]byte(lout))
	}
	wirter.Flush()
	out.Close()
	ch <- f + ".rdf is ok!"
}

func main(){
    flag.Parse()
	separatorKey := flag.Arg(0)
	root := flag.Arg(1)

	separatorMap := map[string]string{"s": " ", "t": "\t", "c": ","}
	separator := separatorMap[separatorKey]

	if 0 == len(separator) {
		fmt.Printf("Please specify the separator!\n t for Tab \n s for Space \n c for comma \n such as: ./gocsv2rdf s ./test \n")
		return
	}

	if 0 == len(root) {
		fmt.Printf("Please specify the CSV folder!\n such as: ./gocsv2rdf s ./test \n")
		return
	}

	files :=getFilelist(root)
	fmt.Printf("%v\n", files)

	
	count := len(files)
	if 0 == count {
		fmt.Printf("Please specify the correct CSV folder!\n")
		return
	}
	ch := make(chan string, count)
	for i:= 0;i<count;i++{
      	name := files[i]
		fmt.Printf("Runninng for %s\n", name)
		//rdf(name, separator, ch)
		go rdf(name, separator, ch) //内存足够大就用这行代码,并行处理
	}
	i := 0
	for x := range ch {
		fmt.Println(x)
		i++
		if i == int(count) {
			break
		}
	}
	fmt.Println("Over!")
}

then bulk load or live load into dgraph.

The schema is

id: int @index(int) .
followers: [uid]  .

type twitter_user {
    id
    followers
}


Hey @BlankRain, can you show me the complete stack trace of the crash? I’m trying to figure out what sequence of function calls led to this crash.

Hi ,
This is the stack trace .

@BlankRain It looks like you’re running a modified version of dgraph

Dgraph version   : v20.07.0-12-g681fe9116
Dgraph codename  : shuri-mod
Go version       : go1.14.1

Can you try running on a released version of dgraph? I don’t have the code you’re running and I cannot debug it :slight_smile: .

Hi
The code based on the release of v20.07.0 .

I modified the code to fix some bugs .
for example:
Bulk loader crashes during reduce phase - #26 by BlankRain
The git log ,I based is this one.

commit 7431be0dce0ffc42e3ff3e31b39fcdd5505e2bb3
Author: Martin Martinez Rivera <martinmr@dgraph.io>
Date:   Mon Aug 3 11:54:25 2020 -0700

    add cluster lables to the jaeger containers (#5951) (#6009)

Thanks @BlankRain. So the call to posting() is failing because uidPosting is nil on line 273.

But it looks like we initialize the it.uidPosting in init which is called by iterate

@BlankRain would you be able to share your dataset? We can try to reproduce it but I doubt if we can easily reproduce it on our end. If you can share your dataset, we can look at it to figure out what’s causing this.

@animesh2049 Do you see any reason pb.Posting can be nil here? The cluster is running in ludicrous mode.

I can share my dataset. It may be too huge to copy.
My dataset is based on the twitter data.
I can show you how to produce the data.

wget http://an.kaist.ac.kr/\~haewoon/release/twitter_social_graph/twitter_rv.tar.gz 

tar xvf twitter_rv.tar.gz 

split -l 54535107 twitter_rv.net

mkdir test

mv x* test

rm test/*.rdf
./gocsv2rdf t test

# then copy test/*.rdf out for bulk load and start the dgraph cluster

the source code of gocsv2rdf is here

package main
import (
    "path/filepath"
    "os"
    "fmt"
	"flag"
	"bufio"
	"io"
	"strings"
)

func getFilelist(path string) []string {
	files := []string{}
    err := filepath.Walk(path, func(path string, f os.FileInfo, err error) error {
        if f == nil {
			return err
		}
        if f.IsDir() {
			return nil
		}
		files = append(files,path)
        return nil
    })
    if err != nil {
        fmt.Printf("filepath.Walk() returned %v\n", err)
	}
	return files
}

func rdf(f string, separator string, ch chan string) {
	rv, err := os.Open(f)
	
	if err != nil {
		fmt.Println("open file err=", err)
	    rv.Close()
		return
	}

	defer rv.Close()

	out, _ := os.OpenFile(f+".rdf", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0644)
	
	reader := bufio.NewReader(rv)
	wirter := bufio.NewWriter(out)

	nodes := make(map[string]bool)
	//循环的读取文件的内容
	errcount := 0
	for {
		str, err := reader.ReadString('\n') // 读到一个换行就结束
		if err == io.EOF {                  // io.EOF表示文件的末尾
			break
		}
		//输出内容
		line := strings.Split(str, separator)
		if len(line) != 2 {
			errcount++
			fmt.Printf("from now on find %d errors in %s\n", errcount, f)
			continue
		}
		src := strings.Trim(line[0], "\n")
		dst := strings.Trim(line[1], "\n")
		tpl := "_:v%s <id> \"%s\" . \n_:v%s <dgraph.type> \"twitter_user\" .\n"
		if !nodes[src] {
			lout := fmt.Sprintf(tpl, src, src, src)
			wirter.Write([]byte(lout))
			nodes[src] = true
		}
		if !nodes[dst] {
			lout := fmt.Sprintf(tpl, dst, dst, dst)
			wirter.Write([]byte(lout))
			nodes[dst] = true
		}

		lout := fmt.Sprintf("_:v%s <followers> _:v%s . \n", src, dst)
		wirter.Write([]byte(lout))
	}
	wirter.Flush()
	out.Close()
	ch <- f + ".rdf is ok!"
}

func main(){
    flag.Parse()
	separatorKey := flag.Arg(0)
	root := flag.Arg(1)

	separatorMap := map[string]string{"s": " ", "t": "\t", "c": ","}
	separator := separatorMap[separatorKey]

	if 0 == len(separator) {
		fmt.Printf("Please specify the separator!\n t for Tab \n s for Space \n c for comma \n such as: ./gocsv2rdf s ./test \n")
		return
	}

	if 0 == len(root) {
		fmt.Printf("Please specify the CSV folder!\n such as: ./gocsv2rdf s ./test \n")
		return
	}

	files :=getFilelist(root)
	fmt.Printf("%v\n", files)

	
	count := len(files)
	if 0 == count {
		fmt.Printf("Please specify the correct CSV folder!\n")
		return
	}
	ch := make(chan string, count)
	for i:= 0;i<count;i++{
      	name := files[i]
		fmt.Printf("Runninng for %s\n", name)
		//rdf(name, separator, ch)
		go rdf(name, separator, ch) //内存足够大就用这行代码,并行处理
	}
	i := 0
	for x := range ch {
		fmt.Println(x)
		i++
		if i == int(count) {
			break
		}
	}
	fmt.Println("Over!")
}

go build it then run it.
all this may need at least 30min or more.

upsert{
  query{
    v as var(func: eq(id,12)) # this one has 1 000 000 followers
  }
  mutation {
   delete{
    uid(v) <followers>  * .
   }
  }
}

I do some delete in upsert block.
the reason pb.Posting can be nil may around here.

Thanks @BlankRain. I’ve accepted this as a bug. @animesh2049 will run some tests locally on your dataset and get back to you.

Hey @BlankRain I have pushed a change, commit id 7873f04087e671b5e45d87aaa1875680324a0f7b. Can you please apply this commit and see if you are still getting the error.

hi, which branch ?

you can just git checkout 7873f040

ok,thanks ,let me try

just post the related pr here: Initialize posting list in moveToNextPart by animesh2049 · Pull Request #6560 · dgraph-io/dgraph · GitHub

Hey @BlankRain are you still facing the issue ?

Sorry , we didn’t finish the test yet. Our cluster is broken. I will let you know when we finish the test.

2 posts were split to a new topic: Tokenizer panics (type error)