Skip to content

Commit 4193616

Browse files
authored
Use gitparse for unstaged changes. (#775)
1 parent b9d6f11 commit 4193616

2 files changed

Lines changed: 82 additions & 39 deletions

File tree

pkg/gitparse/gitparse.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ type Diff struct {
3939

4040
// RepoPath parses the output of the `git log` command for the `source` path.
4141
func RepoPath(ctx context.Context, source string, head string) (chan Commit, error) {
42-
commitChan := make(chan Commit)
43-
4442
args := []string{"-C", source, "log", "-p", "-U0", "--full-history", "--diff-filter=AM", "--date=format:%a %b %d %H:%M:%S %Y %z"}
4543
if head != "" {
4644
args = append(args, head)
@@ -55,6 +53,27 @@ func RepoPath(ctx context.Context, source string, head string) (chan Commit, err
5553
cmd.Env = append(cmd.Env, fmt.Sprintf("GIT_DIR=%s", filepath.Join(absPath, ".git")))
5654
}
5755

56+
return executeCommand(ctx, cmd)
57+
}
58+
59+
// Unstaged parses the output of the `git diff` command for the `source` path.
60+
func Unstaged(ctx context.Context, source string) (chan Commit, error) {
61+
args := []string{"-C", source, "diff", "-p", "-U0", "--full-history", "--diff-filter=AM", "--date=format:%a %b %d %H:%M:%S %Y %z", "HEAD"}
62+
63+
cmd := exec.Command("git", args...)
64+
65+
absPath, err := filepath.Abs(source)
66+
if err == nil {
67+
cmd.Env = append(cmd.Env, fmt.Sprintf("GIT_DIR=%s", filepath.Join(absPath, ".git")))
68+
}
69+
70+
return executeCommand(ctx, cmd)
71+
}
72+
73+
// executeCommand runs an exec.Cmd, reads stdout and stderr, and waits for the Cmd to complete.
74+
func executeCommand(ctx context.Context, cmd *exec.Cmd) (chan Commit, error) {
75+
commitChan := make(chan Commit)
76+
5877
stdOut, err := cmd.StdoutPipe()
5978
if err != nil {
6079
return commitChan, err
@@ -126,6 +145,9 @@ func FromReader(ctx context.Context, stdOut io.Reader, commitChan chan Commit) {
126145
currentCommit.Date = date
127146
case isDiffLine(line):
128147
// This should never be nil, but check in case the stdin stream is messed up.
148+
if currentCommit == nil {
149+
currentCommit = &Commit{}
150+
}
129151
if currentDiff != nil && currentDiff.Content.Len() > 0 {
130152
currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff)
131153
}

pkg/sources/git/git.go

Lines changed: 58 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
package git
22

33
import (
4-
"bytes"
54
"fmt"
6-
"io"
75
"io/ioutil"
86
"net/url"
97
"os"
@@ -363,48 +361,77 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
363361
return nil
364362
}
365363

366-
func (s *Git) ScanUnstaged(repo *git.Repository, scanOptions *ScanOptions, chunksChan chan *sources.Chunk) error {
364+
// ScanUnstaged chunks unstaged changes.
365+
func (s *Git) ScanUnstaged(ctx context.Context, repo *git.Repository, path string, scanOptions *ScanOptions, chunksChan chan *sources.Chunk) error {
367366
// get the URL metadata for reporting (may be empty)
368367
urlMetadata := getSafeRemoteURL(repo, "origin")
369368

370-
// Also scan any unstaged changes in the working tree of the repo
371-
_, err := repo.Head()
372-
if err == nil || err == plumbing.ErrReferenceNotFound {
373-
wt, err := repo.Worktree()
374-
if err != nil {
375-
log.WithError(err).Error("error obtaining repo worktree")
376-
return err
377-
}
369+
commitChan, err := gitparse.Unstaged(ctx, path)
370+
if err != nil {
371+
return err
372+
}
373+
if commitChan == nil {
374+
return nil
375+
}
378376

379-
status, err := wt.Status()
380-
if err != nil {
381-
log.WithError(err).Error("error obtaining worktree status")
382-
return err
383-
}
384-
for fh := range status {
385-
if !scanOptions.Filter.Pass(fh) {
377+
var depth int64
378+
var reachedBase = false
379+
log.Debugf("Scanning repo")
380+
for commit := range commitChan {
381+
for _, diff := range commit.Diffs {
382+
log.WithField("commit", commit.Hash).WithField("file", diff.PathB).Trace("Scanning file from git")
383+
if scanOptions.MaxDepth > 0 && depth >= scanOptions.MaxDepth {
384+
log.Debugf("reached max depth")
385+
break
386+
}
387+
depth++
388+
if reachedBase && commit.Hash != scanOptions.BaseHash {
389+
break
390+
}
391+
if len(scanOptions.BaseHash) > 0 {
392+
if commit.Hash == scanOptions.BaseHash {
393+
log.Debugf("Reached base commit. Finishing scanning files.")
394+
reachedBase = true
395+
}
396+
}
397+
398+
if !scanOptions.Filter.Pass(diff.PathB) {
386399
continue
387400
}
388-
metadata := s.sourceMetadataFunc(
389-
fh, "unstaged", "unstaged", time.Now().String(), urlMetadata, 0,
390-
)
391401

392-
fileBuf := bytes.NewBuffer(nil)
393-
fileHandle, err := wt.Filesystem.Open(fh)
394-
if err != nil {
402+
fileName := diff.PathB
403+
if fileName == "" {
395404
continue
396405
}
397-
defer fileHandle.Close()
398-
_, err = io.Copy(fileBuf, fileHandle)
399-
if err != nil {
406+
var email, hash, when string
407+
email = commit.Author
408+
hash = commit.Hash
409+
when = commit.Date.String()
410+
411+
// Handle binary files by reading the entire file rather than using the diff.
412+
if diff.IsBinary {
413+
commitHash := plumbing.NewHash(hash)
414+
metadata := s.sourceMetadataFunc(fileName, email, "Unstaged", when, urlMetadata, 0)
415+
chunkSkel := &sources.Chunk{
416+
SourceName: s.sourceName,
417+
SourceID: s.sourceID,
418+
SourceType: s.sourceType,
419+
SourceMetadata: metadata,
420+
Verify: s.verify,
421+
}
422+
if err := handleBinary(repo, chunksChan, chunkSkel, commitHash, fileName); err != nil {
423+
log.WithError(err).WithField("file", fileName).Debug("Error handling binary file")
424+
}
400425
continue
401426
}
427+
428+
metadata := s.sourceMetadataFunc(fileName, email, "Unstaged", when, urlMetadata, int64(diff.LineStart))
402429
chunksChan <- &sources.Chunk{
403-
SourceType: s.sourceType,
404430
SourceName: s.sourceName,
405431
SourceID: s.sourceID,
406-
Data: fileBuf.Bytes(),
432+
SourceType: s.sourceType,
407433
SourceMetadata: metadata,
434+
Data: diff.Content.Bytes(),
408435
Verify: s.verify,
409436
}
410437
}
@@ -417,14 +444,8 @@ func (s *Git) ScanRepo(ctx context.Context, repo *git.Repository, repoPath strin
417444
if err := s.ScanCommits(ctx, repo, repoPath, scanOptions, chunksChan); err != nil {
418445
return err
419446
}
420-
if err := s.ScanUnstaged(repo, scanOptions, chunksChan); err != nil {
421-
// https://github.com/src-d/go-git/issues/879
422-
if strings.Contains(err.Error(), "object not found") {
423-
log.WithError(err).Error("known issue: probably caused by a dangling reference in the repo")
424-
} else {
425-
return errors.New(err)
426-
}
427-
return err
447+
if err := s.ScanUnstaged(ctx, repo, repoPath, scanOptions, chunksChan); err != nil {
448+
log.WithError(err).Error("Error scanning unstaged changes")
428449
}
429450
scanTime := time.Now().UnixNano() - start
430451
log.Debugf("Scanning complete. Scan time: %f", time.Duration(scanTime).Seconds())

0 commit comments

Comments
 (0)