Something like this should do the trick, and should be memory-efficient compared to the other answers. I looked at github.com/udhos/equalfile
and it seemed a bit overkill to me. Before you call compare() here, you should do two os.Stat()
calls and compare file sizes for an early out fast path.
The reason to use this implementation over the other answers is because you don't want to hold the entirety of both files in memory if you don't have to. You can read an amount from A and B, compare, and then continue reading the next amount, one buffer-load from each file at a time until you are done. You just have to be careful because you may read 50 bytes from A and then 60 bytes from B because your read may have blocked for some reason.
This implemention assumes a Read() call will not return N > 0 (some bytes read) at the same time as an error != nil. This is how os.File behaves, but not how other implementations of Read may behave, such as net.TCPConn.
import (
"os"
"bytes"
"errors"
)
var errNotSame = errors.New("File contents are different")
func compare(p1, p2 string) error {
var (
buf1 [8192]byte
buf2 [8192]byte
)
fh1, err := os.Open(p1)
if err != nil {
return err
}
defer fh1.Close()
fh2, err := os.Open(p2)
if err != nil {
return err
}
defer fh2.Close()
for {
n1, err1 := fh1.Read(buf1[:])
n2, err2 := fh2.Read(buf2[:])
if err1 == io.EOF && err2 == io.EOF {
// files are the same!
return nil
}
if err1 == io.EOF || err2 == io.EOF {
return errNotSame
}
if err1 != nil {
return err1
}
if err2 != nil {
return err2
}
// short read on n1
for n1 < n2 {
more, err := fh1.Read(buf1[n1:n2])
if err == io.EOF {
return errNotSame
}
if err != nil {
return err
}
n1 += more
}
// short read on n2
for n2 < n1 {
more, err := fh2.Read(buf2[n2:n1])
if err == io.EOF {
return errNotSame
}
if err != nil {
return err
}
n2 += more
}
if n1 != n2 {
// should never happen
return fmt.Errorf("file compare reads out of sync: %d != %d", n1, n2)
}
if bytes.Compare(buf1[:n1], buf2[:n2]) != 0 {
return errNotSame
}
}
}