Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
79f42d3
feat(plugin-essentials): yarn deduplicate
paul-soporan Jul 8, 2020
f1ce7d2
refactor: improve implementation
paul-soporan Jul 8, 2020
05781f1
Merge branch 'master' into paul/feat/deduplicate
paul-soporan Aug 17, 2020
4303e0d
refactor: shorten command name to `yarn dedupe`
paul-soporan Aug 17, 2020
408acbf
refactor: rewrite algorithm
paul-soporan Aug 17, 2020
7063670
refactor: more refactoring
paul-soporan Aug 17, 2020
d5a2291
refactor: even more refactoring
paul-soporan Aug 17, 2020
925112a
refactor: more refactoring + improved docs
paul-soporan Aug 19, 2020
ebfd0c1
docs: better dedupe docs
paul-soporan Aug 19, 2020
e8df62b
test: add tests
paul-soporan Aug 19, 2020
658055c
test: add tests for selective dedupe
paul-soporan Aug 19, 2020
ea4cc55
Merge remote-tracking branch 'yarnpkg/master' into paul/feat/deduplicate
paul-soporan Aug 19, 2020
bb60e25
refactor: refactor everything yet again
paul-soporan Aug 20, 2020
cc91662
refactor: fix cyclic dependency
paul-soporan Aug 20, 2020
51b342e
Update packages/plugin-essentials/sources/commands/dedupe.ts
paul-soporan Aug 22, 2020
4379dbe
Update packages/plugin-essentials/sources/commands/dedupe.ts
paul-soporan Aug 22, 2020
d637d8e
refactor: dedupeSkip -> null
paul-soporan Aug 22, 2020
1a705f5
refactor: skip -> assertion
paul-soporan Aug 22, 2020
cab447e
refactor: add note about virtual packages
paul-soporan Aug 22, 2020
d6b9a58
refactor: skip ->assertion failed
paul-soporan Aug 22, 2020
58ac353
Merge branch 'master' into paul/feat/deduplicate
paul-soporan Aug 22, 2020
9eb67d8
refactor: disable getSatisfying for some resolvers
paul-soporan Aug 23, 2020
aaf702f
refactor: remove resolutionDependencies
paul-soporan Aug 23, 2020
2465986
chore: lint
paul-soporan Aug 23, 2020
1fc26fa
refactor: skip -> assertion failed
paul-soporan Aug 23, 2020
d05b284
Apply suggestions from code review
paul-soporan Aug 23, 2020
50f0c51
refactor: use semver.SemVer instances
paul-soporan Aug 23, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
refactor: rewrite algorithm
  • Loading branch information
paul-soporan committed Aug 17, 2020
commit 408acbfbb618e48f43b95066ba45fdf286e847d8
289 changes: 130 additions & 159 deletions packages/plugin-essentials/sources/commands/dedupe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,116 @@
* - https://github.com/eps1lon/yarn-plugin-deduplicate
*/

import {BaseCommand} from '@yarnpkg/cli';
import {Configuration, Project, ResolveOptions, ThrowReport, Cache, StreamReport, structUtils, IdentHash, LocatorHash, MessageName, Report, DescriptorHash, Locator} from '@yarnpkg/core';
import {Command} from 'clipanion';
import micromatch from 'micromatch';
import semver from 'semver';
import {BaseCommand} from '@yarnpkg/cli';
import {Configuration, Project, ResolveOptions, ThrowReport, Cache, StreamReport, Resolver, miscUtils, Descriptor, Package} from '@yarnpkg/core';
import {structUtils, IdentHash, LocatorHash, MessageName, Report, Fetcher, FetchOptions} from '@yarnpkg/core';
import {Command} from 'clipanion';
import micromatch from 'micromatch';

export const dedupeSkip = Symbol(`dedupeSkip`);

export type DedupePromise = Promise<{
descriptor: Descriptor,
currentPackage: Package,
updatedPackage: Package,
} | typeof dedupeSkip>;

export type DedupeAlgorithm = (project: Project, patterns: Array<string>, opts: {
resolver: Resolver,
resolveOptions: ResolveOptions,
fetcher: Fetcher,
fetchOptions: FetchOptions,
report: Report,
}) => Promise<Array<DedupePromise>>;

export enum Strategy {
Highest = `highest`,
}

export const DEDUPE_ALGORITHMS: Record<Strategy, DedupeAlgorithm> = {
highest: async (project, patterns, {resolver, fetcher, resolveOptions, fetchOptions, report}) => {
const locatorsByIdent = new Map<IdentHash, Set<LocatorHash>>();
for (const [descriptorHash, locatorHash] of project.storedResolutions) {
const descriptor = project.storedDescriptors.get(descriptorHash);
if (!descriptor)
throw new Error(`Assertion failed: The descriptor (${descriptorHash}) should have been registered`);

miscUtils.getSetWithDefault(locatorsByIdent, descriptor.identHash).add(locatorHash);
}

return Array.from(project.storedDescriptors.values(), async descriptor => {
if (structUtils.isVirtualDescriptor(descriptor))
return dedupeSkip;

if (patterns.length && !micromatch.isMatch(structUtils.stringifyIdent(descriptor), patterns))
return dedupeSkip;

const currentResolution = project.storedResolutions.get(descriptor.descriptorHash);
if (typeof currentResolution === `undefined`)
return dedupeSkip;

// We only care about resolutions that are stored in the lockfile
const currentPackage = project.originalPackages.get(currentResolution);
if (typeof currentPackage === `undefined`)
return dedupeSkip;

// No need to try deduping packages that are not persisted,
// they will be resolved again anyways
if (!resolver.shouldPersistResolution(currentPackage, resolveOptions))
return dedupeSkip;

const locators = locatorsByIdent.get(descriptor.identHash);
if (typeof locators === `undefined`)
return dedupeSkip;

// No need to choose when there's only one possibility
if (locators.size === 1)
return dedupeSkip;

const resolutionDependencies = resolver.getResolutionDependencies(descriptor, resolveOptions);
const dependencies = new Map(
resolutionDependencies.map(dependency => {
const resolution = project.storedResolutions.get(dependency.descriptorHash);
if (typeof resolution === `undefined`)
throw new Error(`Assertion failed: The resolution (${structUtils.prettyDescriptor(project.configuration, dependency)}) should have been registered`);

const pkg = project.storedPackages.get(resolution);
if (typeof pkg === `undefined`)
throw new Error(`Assertion failed: The package (${resolution}) should have been registered`);

return [dependency.descriptorHash, pkg] as const;
})
);

const candidates = await resolver.getCandidates(descriptor, dependencies, resolveOptions);

const bestCandidate = candidates.find(({locatorHash}) => locators.has(locatorHash));
if (typeof bestCandidate === `undefined`)
return dedupeSkip;

const updatedResolution = bestCandidate.locatorHash;

// We only care about resolutions that are stored in the lockfile
const updatedPackage = project.originalPackages.get(updatedResolution);
if (typeof updatedPackage === `undefined`)
return dedupeSkip;

if (updatedResolution === currentResolution)
return dedupeSkip;

return {descriptor, currentPackage, updatedPackage};
});
},
};

// eslint-disable-next-line arca/no-default-export
export default class DedupeCommand extends BaseCommand {
@Command.Rest()
patterns: Array<string> = [];

@Command.String(`-s,--strategy`)
strategy: Strategy = Strategy.Highest;

@Command.Boolean(`--check`)
check: boolean = false;

Expand Down Expand Up @@ -61,7 +160,7 @@ export default class DedupeCommand extends BaseCommand {
stdout: this.context.stdout,
json: this.json,
}, async report => {
await deduplicate(project, this.patterns, {cache, report});
await deduplicate(this.strategy, project, this.patterns, {cache, report});
});

if (this.check) {
Expand All @@ -80,161 +179,43 @@ export default class DedupeCommand extends BaseCommand {
}
}

export interface DeduplicationFactors {
locatorsByIdent: Map<IdentHash, Set<LocatorHash>>;
}

export function getDeduplicationFactors(project: Project): DeduplicationFactors {
const locatorsByIdent = new Map<IdentHash, Set<LocatorHash>>();
for (const [descriptorHash, locatorHash] of project.storedResolutions) {
const descriptor = project.storedDescriptors.get(descriptorHash);
if (!descriptor)
throw new Error(`Assertion failed: The descriptor (${descriptorHash}) should have been registered`);

if (!locatorsByIdent.has(descriptor.identHash))
locatorsByIdent.set(descriptor.identHash, new Set());
locatorsByIdent.get(descriptor.identHash)!.add(locatorHash);
}

return {locatorsByIdent};
}

export async function deduplicate(project: Project, patterns: Array<string>, {cache, report}: {cache: Cache, report: Report}) {
export async function deduplicate(strategy: Strategy, project: Project, patterns: Array<string>, {cache, report}: {cache: Cache, report: Report}) {
const {configuration} = project;
const throwReport = new ThrowReport();

const resolver = configuration.makeResolver();
const fetcher = configuration.makeFetcher();

const fetchOptions: FetchOptions = {
cache,
checksums: project.storedChecksums,
fetcher,
project,
report: throwReport,
skipIntegrityCheck: true,
};
const resolveOptions: ResolveOptions = {
project,
resolver,
report: throwReport,
fetchOptions: {
project,
report: throwReport,
fetcher,
cache,
checksums: project.storedChecksums,
skipIntegrityCheck: true,
},
fetchOptions,
};

await report.startTimerPromise(`Deduplication step`, async () => {
// We deduplicate in multiple passes because deduplicating a package can cause
// its dependencies - if unused - to be removed from the project.storedDescriptors
// map, which, in turn, can unlock more deduplication possibilities

let currentPassIdents: Array<IdentHash> = [];
let nextPassIdents: Array<IdentHash> = [...project.storedDescriptors.values()].map(({identHash}) => identHash);

// We cache the resolved candidates to speed up subsequent iterations
const candidateCache = new Map<DescriptorHash, Array<Locator>>();

while (nextPassIdents.length > 0) {
// We resolve the dependency tree between passes to get rid of unused dependencies
await project.resolveEverything({cache, resolver, report: throwReport, lockfileOnly: false});

currentPassIdents = nextPassIdents;
nextPassIdents = [];

const progress = StreamReport.progressViaCounter(project.storedDescriptors.size);
report.reportProgress(progress);

// We can deduplicate descriptors in parallel (which is 4x faster) because,
// even though we work on the same project instance, we only update their
// resolutions; race conditions are possible when computing the deduplication
// factors (the computed best deduplication candidate not being the best anymore),
// but, because we deduplicate in multiple passes, these problems will solve
// themselves in the next iterations
await Promise.all(
[...project.storedDescriptors.entries()].map(async ([descriptorHash, descriptor]) => {
try {
if (structUtils.isVirtualDescriptor(descriptor))
return;

if (!currentPassIdents.includes(descriptor.identHash))
return;

if (patterns.length && !micromatch.isMatch(structUtils.stringifyIdent(descriptor), patterns))
return;

const currentResolution = project.storedResolutions.get(descriptorHash);
if (!currentResolution)
return;

// We only care about resolutions that are stored in the lockfile
const currentPackage = project.originalPackages.get(currentResolution);
if (!currentPackage)
return;

// No need to try deduplicating packages that are not persisted,
// because they will be resolved again anyways
if (!resolver.shouldPersistResolution(currentPackage, resolveOptions))
return;

const {locatorsByIdent} = getDeduplicationFactors(project);

const locators = locatorsByIdent.get(descriptor.identHash);
if (!locators)
return;

// No need to choose when there's only one possibility
if (locators.size === 1)
return;

const sortedLocators = [...locators].sort((a, b) => {
const aPackage = project.storedPackages.get(a);
const bPackage = project.storedPackages.get(b);
const algorithm = DEDUPE_ALGORITHMS[strategy];
const dedupePromises = await algorithm(project, patterns, {resolver, resolveOptions, fetcher, fetchOptions, report});

if (!aPackage?.version && !bPackage?.version)
return 0;
const progress = StreamReport.progressViaCounter(dedupePromises.length);
report.reportProgress(progress);

if (!aPackage?.version)
return -1;

if (!bPackage?.version)
return 1;

return semver.compare(aPackage.version, bPackage.version);
}).reverse();

const resolutionDependencies = resolver.getResolutionDependencies(descriptor, resolveOptions);
const dependencies = new Map(
resolutionDependencies.map(dependency => {
const resolution = project.storedResolutions.get(dependency.descriptorHash);
if (!resolution)
throw new Error(`Assertion failed: The resolution (${structUtils.prettyDescriptor(configuration, dependency)}) should have been registered`);

const pkg = project.storedPackages.get(resolution);
if (!pkg)
throw new Error(`Assertion failed: The package (${resolution}) should have been registered`);

return [dependency.descriptorHash, pkg] as const;
})
);

let candidates: Array<Locator>;
if (candidateCache.has(descriptorHash)) {
candidates = candidateCache.get(descriptorHash)!;
} else {
candidates = await resolver.getCandidates(descriptor, dependencies, resolveOptions);
candidateCache.set(descriptorHash, candidates);
}

const updatedResolution = sortedLocators.find(
locatorHash => candidates.map(({locatorHash}) => locatorHash).includes(locatorHash)
);
if (!updatedResolution)
await Promise.all(
dedupePromises.map(dedupePromise =>
dedupePromise
.then(dedupe => {
if (dedupe === dedupeSkip)
return;

// We only care about resolutions that are stored in the lockfile
const updatedPackage = project.originalPackages.get(updatedResolution);
if (!updatedPackage)
return;

if (updatedResolution === currentResolution)
return;
const {descriptor, currentPackage, updatedPackage} = dedupe;

report.reportWarning(
MessageName.UNNAMED,
Expand All @@ -253,20 +234,10 @@ export async function deduplicate(project: Project, patterns: Array<string>, {ca
updatedResolution: structUtils.stringifyLocator(updatedPackage),
});

project.storedResolutions.set(descriptorHash, updatedResolution);

// We schedule some idents for the next pass
nextPassIdents.push(
// We try deduplicating the current package even further
descriptor.identHash,
// We also try deduplicating its dependencies
...currentPackage.dependencies.keys(),
);
} finally {
progress.tick();
}
})
);
}
project.storedResolutions.set(descriptor.descriptorHash, updatedPackage.locatorHash);
})
.finally(() => progress.tick())
)
);
});
}