-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
feat: Adding cas support to catalog #3929
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c20168c
a521804
cb731e6
1a54f04
6d70093
0b592ea
52a9cd1
4802efe
5629b2c
9264dc3
15d4fa3
629f98b
926fc74
add47f5
67ce742
1f4704f
95f3919
28aad5a
7ab6f05
477e496
4674e3f
1549810
0ee6b48
23192d4
46bf743
172901f
06c4fe3
09950d8
b2650f2
996d9ad
28e4e69
ac38312
acebb90
d69785f
557290b
2196a3a
18b7990
5f2ab42
8a66be5
d0f60f7
27daacb
21996f6
8f9b5ce
b3b4d6f
5e692a5
97d7859
60e7b2b
4c53664
9b1a4fc
456c68c
53d2566
12fe862
adf258f
cb71191
58f3ea3
3844890
baa76b0
1e0805d
ebe538c
87617ad
edd85b1
428d562
46b8000
c6efc57
f50e994
49291b1
5086658
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -3,7 +3,6 @@ package module | |||||
| import ( | ||||||
| "context" | ||||||
| "fmt" | ||||||
| "net/url" | ||||||
| "os" | ||||||
| "path/filepath" | ||||||
| "regexp" | ||||||
|
|
@@ -13,10 +12,11 @@ import ( | |||||
|
|
||||||
| "github.com/gitsight/go-vcsurl" | ||||||
| "github.com/gruntwork-io/go-commons/files" | ||||||
| "github.com/gruntwork-io/terragrunt/internal/cas" | ||||||
| "github.com/gruntwork-io/terragrunt/internal/errors" | ||||||
| "github.com/gruntwork-io/terragrunt/pkg/log" | ||||||
| "github.com/gruntwork-io/terragrunt/tf" | ||||||
| "github.com/hashicorp/go-getter" | ||||||
| "github.com/hashicorp/go-getter/v2" | ||||||
| "gopkg.in/ini.v1" | ||||||
| ) | ||||||
|
|
||||||
|
|
@@ -27,13 +27,17 @@ const ( | |||||
| azuredevHost = "dev.azure.com" | ||||||
| bitbucketHost = "bitbucket.org" | ||||||
| gitlabSelfHostedRegex = `^(gitlab\.(.+))$` | ||||||
|
|
||||||
| cloneCompleteSentinel = ".catalog-clone-complete" | ||||||
| ) | ||||||
|
|
||||||
| var ( | ||||||
| gitHeadBranchNameReg = regexp.MustCompile(`^.*?([^/]+)$`) | ||||||
| repoNameFromCloneURLReg = regexp.MustCompile(`(?i)^.*?([-a-z_.]+)[^/]*?(?:\.git)?$`) | ||||||
| repoNameFromCloneURLReg = regexp.MustCompile(`(?i)^.*?([-a-z0-9_.]+)[^/]*?(?:\.git)?$`) | ||||||
|
|
||||||
| modulesPaths = []string{"modules"} | ||||||
|
|
||||||
| includedGitFiles = []string{"HEAD", "config"} | ||||||
| ) | ||||||
|
|
||||||
| type Repo struct { | ||||||
|
|
@@ -46,17 +50,19 @@ type Repo struct { | |||||
| BranchName string | ||||||
|
|
||||||
| walkWithSymlinks bool | ||||||
| allowCAS bool | ||||||
| } | ||||||
|
|
||||||
| func NewRepo(ctx context.Context, logger log.Logger, cloneURL, tempDir string, walkWithSymlinks bool) (*Repo, error) { | ||||||
| func NewRepo(ctx context.Context, l log.Logger, cloneURL, path string, walkWithSymlinks bool, allowCAS bool) (*Repo, error) { | ||||||
| repo := &Repo{ | ||||||
| logger: logger, | ||||||
| logger: l, | ||||||
| cloneURL: cloneURL, | ||||||
| path: tempDir, | ||||||
| path: path, | ||||||
| walkWithSymlinks: walkWithSymlinks, | ||||||
| allowCAS: allowCAS, | ||||||
| } | ||||||
|
|
||||||
| if err := repo.clone(ctx); err != nil { | ||||||
| if err := repo.clone(ctx, l); err != nil { | ||||||
| return nil, err | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -163,69 +169,164 @@ func (repo *Repo) ModuleURL(moduleDir string) (string, error) { | |||||
| return "", errors.Errorf("hosting: %q is not supported yet", remote.Host) | ||||||
| } | ||||||
|
|
||||||
| // clone clones the repository to a temporary directory if the repoPath is URL | ||||||
| func (repo *Repo) clone(ctx context.Context) error { | ||||||
| type CloneOptions struct { | ||||||
| SourceURL string | ||||||
| TargetPath string | ||||||
| Context context.Context | ||||||
| Logger log.Logger | ||||||
| } | ||||||
|
|
||||||
| func (repo *Repo) clone(ctx context.Context, l log.Logger) error { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For what do you pass |
||||||
| cloneURL, err := repo.resolveCloneURL() | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| // Handle local directory case | ||||||
| if files.IsDir(cloneURL) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I know it's not your code, but we need to use one thing everywhere, if |
||||||
| return repo.handleLocalDir(cloneURL) | ||||||
| } | ||||||
|
|
||||||
| // Prepare clone options | ||||||
| opts := CloneOptions{ | ||||||
| SourceURL: cloneURL, | ||||||
| TargetPath: repo.path, | ||||||
| Context: ctx, | ||||||
| Logger: repo.logger, | ||||||
| } | ||||||
|
|
||||||
| if err := repo.prepareCloneDirectory(); err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| if repo.cloneCompleted() { | ||||||
| repo.logger.Debugf("The repo dir exists and %q exists. Skipping cloning.", cloneCompleteSentinel) | ||||||
|
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| return repo.performClone(ctx, l, &opts) | ||||||
| } | ||||||
|
|
||||||
| func (repo *Repo) resolveCloneURL() (string, error) { | ||||||
| if repo.cloneURL == "" { | ||||||
| currentDir, err := os.Getwd() | ||||||
| if err != nil { | ||||||
| return errors.New(err) | ||||||
| return "", errors.New(err) | ||||||
| } | ||||||
|
|
||||||
| repo.cloneURL = currentDir | ||||||
| return currentDir, nil | ||||||
| } | ||||||
|
|
||||||
| if repoPath := repo.cloneURL; files.IsDir(repoPath) { | ||||||
| if !filepath.IsAbs(repoPath) { | ||||||
| absRepoPath, err := filepath.Abs(repoPath) | ||||||
| if err != nil { | ||||||
| return errors.New(err) | ||||||
| } | ||||||
| return repo.cloneURL, nil | ||||||
| } | ||||||
|
Comment on lines
+211
to
+222
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Double-check potential data loss during re-cloning.
Also applies to: 237-253, 255-273 |
||||||
|
|
||||||
| repo.logger.Debugf("Converting relative path %q to absolute %q", repoPath, absRepoPath) | ||||||
| func (repo *Repo) handleLocalDir(repoPath string) error { | ||||||
| if !filepath.IsAbs(repoPath) { | ||||||
| absRepoPath, err := filepath.Abs(repoPath) | ||||||
| if err != nil { | ||||||
| return errors.New(err) | ||||||
| } | ||||||
|
|
||||||
| repo.path = repoPath | ||||||
| repo.logger.Debugf("Converting relative path %q to absolute %q", repoPath, absRepoPath) | ||||||
| repo.path = absRepoPath | ||||||
|
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| repo.path = repoPath | ||||||
|
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| func (repo *Repo) prepareCloneDirectory() error { | ||||||
| if err := os.MkdirAll(repo.path, os.ModePerm); err != nil { | ||||||
| return errors.New(err) | ||||||
| } | ||||||
|
|
||||||
| repoName := repo.extractRepoName() | ||||||
| repo.path = filepath.Join(repo.path, repoName) | ||||||
|
|
||||||
| // Clean up incomplete clones | ||||||
| if repo.shouldCleanupIncompleteClone() { | ||||||
| repo.logger.Debugf("The repo dir exists but %q does not. Removing the repo dir for cloning from the remote source.", cloneCompleteSentinel) | ||||||
|
|
||||||
| if err := os.RemoveAll(repo.path); err != nil { | ||||||
| return errors.New(err) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| func (repo *Repo) extractRepoName() string { | ||||||
| repoName := "temp" | ||||||
| if match := repoNameFromCloneURLReg.FindStringSubmatch(repo.cloneURL); len(match) > 0 && match[1] != "" { | ||||||
| repoName = match[1] | ||||||
| } | ||||||
|
|
||||||
| repo.path = filepath.Join(repo.path, repoName) | ||||||
| return repoName | ||||||
| } | ||||||
|
|
||||||
| // Since we are cloning the repository into a temporary directory, some operating systems such as MacOS have a service for deleting files that have not been accessed for a long time. | ||||||
| // For example, in MacOS the service is responsible for deleting unused files deletes only files while leaving the directory structure is untouched, which in turn misleads `go-getter`, which thinks that the repository exists but cannot update it due to the lack of files. In such cases, we simply delete the temporary directory in order to clone the one again. | ||||||
| // See https://github.com/gruntwork-io/terragrunt/pull/2888 | ||||||
| if files.FileExists(repo.path) && !files.FileExists(repo.gitHeadfile()) { | ||||||
| repo.logger.Debugf("The repo dir exists but git file %q does not. Removing the repo dir for cloning from the remote source.", repo.gitHeadfile()) | ||||||
| func (repo *Repo) shouldCleanupIncompleteClone() bool { | ||||||
| return files.FileExists(repo.path) && !repo.cloneCompleted() | ||||||
| } | ||||||
|
|
||||||
| if err := os.RemoveAll(repo.path); err != nil { | ||||||
| return errors.New(err) | ||||||
| func (repo *Repo) cloneCompleted() bool { | ||||||
| return files.FileExists(filepath.Join(repo.path, cloneCompleteSentinel)) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| } | ||||||
|
|
||||||
| func (repo *Repo) performClone(ctx context.Context, l log.Logger, opts *CloneOptions) error { | ||||||
| client := getter.DefaultClient | ||||||
|
|
||||||
| if repo.allowCAS { | ||||||
| c, err := cas.New(cas.Options{}) | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| cloneOpts := cas.CloneOptions{ | ||||||
| Dir: repo.path, | ||||||
| IncludedGitFiles: includedGitFiles, | ||||||
| } | ||||||
|
|
||||||
| client.Getters = append([]getter.Getter{cas.NewCASGetter(&l, c, &cloneOpts)}, client.Getters...) | ||||||
| } | ||||||
|
|
||||||
| sourceURL, err := tf.ToSourceURL(repo.cloneURL, "") | ||||||
| sourceURL, err := tf.ToSourceURL(opts.SourceURL, "") | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| repo.cloneURL = sourceURL.String() | ||||||
| opts.Logger.Infof("Cloning repository %q to temporary directory %q", repo.cloneURL, repo.path) | ||||||
|
|
||||||
| repo.logger.Infof("Cloning repository %q to temporary directory %q", repo.cloneURL, repo.path) | ||||||
| // Check first if the query param ref is already set | ||||||
| q := sourceURL.Query() | ||||||
|
|
||||||
| // We need to explicitly specify the reference, otherwise we will get an error: | ||||||
| // "fatal: The empty string is not a valid pathspec. Use . instead if you wanted to match all paths" | ||||||
| // when updating an existing repository. | ||||||
| sourceURL.RawQuery = (url.Values{"ref": []string{"HEAD"}}).Encode() | ||||||
| ref := q.Get("ref") | ||||||
| if ref != "" { | ||||||
| q.Set("ref", "HEAD") | ||||||
| } | ||||||
|
|
||||||
| sourceURL.RawQuery = q.Encode() | ||||||
|
|
||||||
| if err := getter.Get(repo.path, strings.Trim(sourceURL.String(), "/"), getter.WithContext(ctx), getter.WithMode(getter.ClientModeDir)); err != nil { | ||||||
| _, err = client.Get(ctx, &getter.Request{ | ||||||
| Src: sourceURL.String(), | ||||||
| Dst: repo.path, | ||||||
| GetMode: getter.ModeDir, | ||||||
| }) | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| // Create the sentinel file to indicate that the clone is complete | ||||||
| f, err := os.Create(filepath.Join(repo.path, cloneCompleteSentinel)) | ||||||
| if err != nil { | ||||||
| return errors.New(err) | ||||||
| } | ||||||
|
|
||||||
| if err := f.Close(); err != nil { | ||||||
| return errors.New(err) | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -237,7 +338,7 @@ func (repo *Repo) parseRemoteURL() error { | |||||
| gitConfigPath := filepath.Join(repo.path, ".git", "config") | ||||||
|
|
||||||
| if !files.FileExists(gitConfigPath) { | ||||||
| return errors.Errorf("the specified path %q is not a git repository", repo.path) | ||||||
| return errors.Errorf("the specified path %q is not a git repository (no .git/config file found)", repo.path) | ||||||
| } | ||||||
|
|
||||||
| repo.logger.Debugf("Parsing git config %q", gitConfigPath) | ||||||
|
|
@@ -280,11 +381,12 @@ func (repo *Repo) gitHeadfile() string { | |||||
| func (repo *Repo) parseBranchName() error { | ||||||
| data, err := files.ReadFileAsString(repo.gitHeadfile()) | ||||||
| if err != nil { | ||||||
| return errors.Errorf("the specified path %q is not a git repository", repo.path) | ||||||
| return errors.Errorf("the specified path %q is not a git repository (no .git/HEAD file found)", repo.path) | ||||||
| } | ||||||
|
|
||||||
| if match := gitHeadBranchNameReg.FindStringSubmatch(data); len(match) > 0 { | ||||||
| repo.BranchName = strings.TrimSpace(match[1]) | ||||||
|
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| --- | ||
| title: Content Addressable Store (CAS) | ||
| description: Learn how Terragrunt supports deduplication of content using a Content Addressable Store (CAS). | ||
| slug: docs/features/cas | ||
| sidebar: | ||
| order: 14 | ||
| --- | ||
|
|
||
| Terragrunt supports a Content Addressable Store (CAS) to deduplicate content across multiple Terragrunt configurations. This feature is still experimental and not recommended for general production usage. | ||
|
|
||
| At the moment, the only supported use case for the CAS is to speed up catalog cloning. In the future, the CAS can be used to store more content. | ||
|
|
||
| To use the CAS, you will need to enable the [cas](/docs/reference/experiments/#cas) experiment. | ||
|
|
||
| ## Usage | ||
|
|
||
| When you enable the `cas` experiment, Terragrunt will automatically use the CAS when cloning any compatible source (right now, only Git repositories). | ||
|
|
||
| ```hcl | ||
| # root.hcl | ||
|
|
||
| catalog { | ||
| urls = [ | ||
| "[email protected]:acme/modules.git" | ||
| ] | ||
| } | ||
| ``` | ||
|
|
||
| When Terragrunt clones a repository while using the CAS. If the repository is not found in the CAS, Terragrunt will clone the repository from the original URL and store it in the CAS for future use. | ||
|
|
||
| When generating a repository from the CAS, Terragrunt will hard link entries from the CAS to the new repository. This allows Terragrunt to deduplicate content across multiple repositories. | ||
|
|
||
| In the event that hard linking fails due to some operating system / host incompatibility with hard links, Terragrunt will fall back to performing copies of the content from the CAS. | ||
|
|
||
| ## Storage | ||
|
|
||
| The CAS is stored in the `~/.cache/terragrunt/cas` directory. This directory can be safely deleted at any time, as Terragrunt will automatically regenerate the CAS as needed. | ||
|
|
||
| Avoid partial deletions of the CAS directory without care, as that might result in partially cloned repositories and unexpected behavior. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍