Quantcast
Channel: AFPy's Planet
Viewing all articles
Browse latest Browse all 3409

[hautefeuille] Recherche de fichiers doublons avec Golang

$
0
0

On utilise les fonctions de hashage pour comparer les empreintes des fichiers. Les fichiers dupliqués sont déplacés dans un répertoire spécifique.

On lance le programme :

./main -s test -m sha1 -d dupl 
  • s est le répertoire source,
  • m est la méthode de hashage,
  • d est le répertoire contenant les fichiers dupliqués.

Le code du programme est disponible sur ma page Github.

    package main

    import (
        "fmt"
        "os"
        "io"
        "path/filepath"
        "github.com/kr/fs"
        "crypto/sha1"
        "crypto/md5"
        "flag"
        "math"
        "hash"
    )

    const filechunk = 8192

    func hasher(filename string, method string) string {
        file, err := os.Open(filename)
        if err != nil {
            panic(err.Error())
        }
        defer file.Close()
        info, _ := file.Stat()
        filesize := info.Size()
        blocks := uint64(math.Ceil(float64(filesize) / float64(filechunk)))
        var h hash.Hash
        switch method {
        case "sha1":
            h = sha1.New()
        case "md5":
            h = md5.New()
        default:
            h = sha1.New()
        }
        for i := uint64(0); i < blocks; i++ {
            blocksize := int(math.Min(filechunk, float64(filesize-int64(i*filechunk))))
            buf := make([] byte, blocksize)
            file.Read(buf)
            io.WriteString(h, string(buf)) //append to the hash
        }
        return fmt.Sprintf("%X", h.Sum(nil))
    }

    func marcher(dir string, method string, dest string) map[string]string {
        hmap := make(map[string]string) // {hash:path}
        walker := fs.Walk(dir)
        for walker.Step() {
            // Start walking
            if err := walker.Err(); err != nil {
                    fmt.Fprintln(os.Stderr, err)
                    continue
                    }
            // Check if it is a file
            finfo, err := os.Stat(walker.Path())
            if err != nil {
                fmt.Println(err)
                continue
                }
            if finfo.IsDir() {
                continue // it's a dir so pass and continue
            } else {
                // it's a file so process
                path := walker.Path()
                hash := hasher(walker.Path(), method) 
                search, ok := hmap[hash] 
                if ok {
                     _, filename := filepath.Split(path)
                    if err := os.Rename(path, filepath.Join(dest, filename)); err != nil {
                    fmt.Println(err)
                    continue
                    }
                    fmt.Println("Duplicates moved =>", search)
                } else {
                    hmap[hash] = path
                    fmt.Println(hash, "=>", path)
                }
            }
        }
        return hmap
    }

    func main() {
        source := flag.String("s", "test", "Directory to scan")
        method := flag.String("m", "sha1", "Choose hashing method : md5 or sha1")
        destination := flag.String("d", "doublon", "Choose duplicates destination")
        flag.Parse()
        marcher(*source, *method, *destination)
    }

Viewing all articles
Browse latest Browse all 3409

Trending Articles