Skip to content

Commit 60aaca6

Browse files
committedApr 6, 2023
Auto merge of #11937 - arlosi:fuzzy, r=ehuss
Stop using UncanonicalizedIter for QueryKind::Exact `QueryKind::Exact` causes unnecessary HTTP requests when querying for crates that don't exist. Even though the query is `Exact`, when fetching `Summaries`, Cargo still uses `UncanonicalizedIter` and requests additional possible crate names if the first one isn't found. This change moves the use of `UncanonicalizedIter` further up the call stack such that we have the `QueryKind` available, and only do the additional queries for `QueryKind::Fuzzy`. The impact is most noticeable when publishing a new crate that contains `-` or `_`. Since Cargo waits for publish by querying the registry, if the crate name is `my-example-test-crate`, Cargo currently makes 8 HTTP requests each second while waiting for the crate to be available. With this fix, Cargo makes only 1 request per second. cc #11934
2 parents 41f7888 + a888c94 commit 60aaca6

File tree

3 files changed

+107
-78
lines changed

3 files changed

+107
-78
lines changed
 

‎src/cargo/sources/registry/index.rs

+27-52
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@
6666
//! details like invalidating caches and whatnot which are handled below, but
6767
//! hopefully those are more obvious inline in the code itself.
6868
69-
use crate::core::dependency::Dependency;
7069
use crate::core::{PackageId, SourceId, Summary};
7170
use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage, INDEX_V_MAX};
7271
use crate::util::interning::InternedString;
@@ -87,14 +86,14 @@ use std::task::{ready, Poll};
8786
/// This loop tries all possible combinations of switching hyphen and underscores to find the
8887
/// uncanonicalized one. As all stored inputs have the correct spelling, we start with the spelling
8988
/// as-provided.
90-
struct UncanonicalizedIter<'s> {
89+
pub struct UncanonicalizedIter<'s> {
9190
input: &'s str,
9291
num_hyphen_underscore: u32,
9392
hyphen_combination_num: u16,
9493
}
9594

9695
impl<'s> UncanonicalizedIter<'s> {
97-
fn new(input: &'s str) -> Self {
96+
pub fn new(input: &'s str) -> Self {
9897
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
9998
UncanonicalizedIter {
10099
input,
@@ -267,7 +266,7 @@ impl<'cfg> RegistryIndex<'cfg> {
267266
/// Returns the hash listed for a specified `PackageId`.
268267
pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
269268
let req = OptVersionReq::exact(pkg.version());
270-
let summary = self.summaries(pkg.name(), &req, load)?;
269+
let summary = self.summaries(&pkg.name(), &req, load)?;
271270
let summary = ready!(summary).next();
272271
Poll::Ready(Ok(summary
273272
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
@@ -285,7 +284,7 @@ impl<'cfg> RegistryIndex<'cfg> {
285284
/// though since this method is called quite a lot on null builds in Cargo.
286285
pub fn summaries<'a, 'b>(
287286
&'a mut self,
288-
name: InternedString,
287+
name: &str,
289288
req: &'b OptVersionReq,
290289
load: &mut dyn RegistryData,
291290
) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
@@ -299,6 +298,7 @@ impl<'cfg> RegistryIndex<'cfg> {
299298
// has run previously this will parse a Cargo-specific cache file rather
300299
// than the registry itself. In effect this is intended to be a quite
301300
// cheap operation.
301+
let name = InternedString::new(name);
302302
let summaries = ready!(self.load_summaries(name, load)?);
303303

304304
// Iterate over our summaries, extract all relevant ones which match our
@@ -361,45 +361,17 @@ impl<'cfg> RegistryIndex<'cfg> {
361361
.flat_map(|c| c.to_lowercase())
362362
.collect::<String>();
363363

364-
let mut any_pending = false;
365-
// Attempt to handle misspellings by searching for a chain of related
366-
// names to the original `fs_name` name. Only return summaries
367-
// associated with the first hit, however. The resolver will later
368-
// reject any candidates that have the wrong name, and with this it'll
369-
// along the way produce helpful "did you mean?" suggestions.
370-
for (i, name_permutation) in UncanonicalizedIter::new(&fs_name).take(1024).enumerate() {
371-
let path = make_dep_path(&name_permutation, false);
372-
let summaries = Summaries::parse(
373-
root,
374-
&cache_root,
375-
path.as_ref(),
376-
self.source_id,
377-
load,
378-
self.config,
379-
)?;
380-
if summaries.is_pending() {
381-
if i == 0 {
382-
// If we have not herd back about the name as requested
383-
// then don't ask about other spellings yet.
384-
// This prevents us spamming all the variations in the
385-
// case where we have the correct spelling.
386-
return Poll::Pending;
387-
}
388-
any_pending = true;
389-
}
390-
if let Poll::Ready(Some(summaries)) = summaries {
391-
self.summaries_cache.insert(name, summaries);
392-
return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
393-
}
394-
}
395-
396-
if any_pending {
397-
return Poll::Pending;
398-
}
399-
400-
// If nothing was found then this crate doesn't exists, so just use an
401-
// empty `Summaries` list.
402-
self.summaries_cache.insert(name, Summaries::default());
364+
let path = make_dep_path(&fs_name, false);
365+
let summaries = ready!(Summaries::parse(
366+
root,
367+
&cache_root,
368+
path.as_ref(),
369+
self.source_id,
370+
load,
371+
self.config,
372+
))?
373+
.unwrap_or_default();
374+
self.summaries_cache.insert(name, summaries);
403375
Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
404376
}
405377

@@ -410,7 +382,8 @@ impl<'cfg> RegistryIndex<'cfg> {
410382

411383
pub fn query_inner(
412384
&mut self,
413-
dep: &Dependency,
385+
name: &str,
386+
req: &OptVersionReq,
414387
load: &mut dyn RegistryData,
415388
yanked_whitelist: &HashSet<PackageId>,
416389
f: &mut dyn FnMut(Summary),
@@ -426,25 +399,28 @@ impl<'cfg> RegistryIndex<'cfg> {
426399
// then cargo will fail to download and an error message
427400
// indicating that the required dependency is unavailable while
428401
// offline will be displayed.
429-
if ready!(self.query_inner_with_online(dep, load, yanked_whitelist, f, false)?) > 0 {
402+
if ready!(self.query_inner_with_online(name, req, load, yanked_whitelist, f, false)?)
403+
> 0
404+
{
430405
return Poll::Ready(Ok(()));
431406
}
432407
}
433-
self.query_inner_with_online(dep, load, yanked_whitelist, f, true)
408+
self.query_inner_with_online(name, req, load, yanked_whitelist, f, true)
434409
.map_ok(|_| ())
435410
}
436411

437412
fn query_inner_with_online(
438413
&mut self,
439-
dep: &Dependency,
414+
name: &str,
415+
req: &OptVersionReq,
440416
load: &mut dyn RegistryData,
441417
yanked_whitelist: &HashSet<PackageId>,
442418
f: &mut dyn FnMut(Summary),
443419
online: bool,
444420
) -> Poll<CargoResult<usize>> {
445421
let source_id = self.source_id;
446422

447-
let summaries = ready!(self.summaries(dep.package_name(), dep.version_req(), load))?;
423+
let summaries = ready!(self.summaries(name, req, load))?;
448424

449425
let summaries = summaries
450426
// First filter summaries for `--offline`. If we're online then
@@ -469,7 +445,6 @@ impl<'cfg> RegistryIndex<'cfg> {
469445
// `<pkg>=<p_req>o-><f_req>` where `<pkg>` is the name of a crate on
470446
// this source, `<p_req>` is the version installed and `<f_req> is the
471447
// version requested (argument to `--precise`).
472-
let name = dep.package_name().as_str();
473448
let precise = match source_id.precise() {
474449
Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
475450
let mut vers = p[name.len() + 1..].splitn(2, "->");
@@ -481,7 +456,7 @@ impl<'cfg> RegistryIndex<'cfg> {
481456
};
482457
let summaries = summaries.filter(|s| match &precise {
483458
Some((current, requested)) => {
484-
if dep.version_req().matches(current) {
459+
if req.matches(current) {
485460
// Unfortunately crates.io allows versions to differ only
486461
// by build metadata. This shouldn't be allowed, but since
487462
// it is, this will honor it if requested. However, if not
@@ -521,7 +496,7 @@ impl<'cfg> RegistryIndex<'cfg> {
521496
) -> Poll<CargoResult<bool>> {
522497
let req = OptVersionReq::exact(pkg.version());
523498
let found = self
524-
.summaries(pkg.name(), &req, load)
499+
.summaries(&pkg.name(), &req, load)
525500
.map_ok(|mut p| p.any(|summary| summary.yanked));
526501
found
527502
}

‎src/cargo/sources/registry/mod.rs

+63-26
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ use std::collections::HashSet;
164164
use std::fs::{File, OpenOptions};
165165
use std::io::{self, Write};
166166
use std::path::{Path, PathBuf};
167-
use std::task::Poll;
167+
use std::task::{ready, Poll};
168168

169169
use anyhow::Context as _;
170170
use cargo_util::paths::{self, exclude_from_backups_and_indexing};
@@ -756,7 +756,7 @@ impl<'cfg> RegistrySource<'cfg> {
756756
let req = OptVersionReq::exact(package.version());
757757
let summary_with_cksum = self
758758
.index
759-
.summaries(package.name(), &req, &mut *self.ops)?
759+
.summaries(&package.name(), &req, &mut *self.ops)?
760760
.expect("a downloaded dep now pending!?")
761761
.map(|s| s.summary.clone())
762762
.next()
@@ -786,36 +786,73 @@ impl<'cfg> Source for RegistrySource<'cfg> {
786786
{
787787
debug!("attempting query without update");
788788
let mut called = false;
789-
let pend =
790-
self.index
791-
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
792-
if dep.matches(&s) {
793-
called = true;
794-
f(s);
795-
}
796-
})?;
797-
if pend.is_pending() {
798-
return Poll::Pending;
799-
}
789+
ready!(self.index.query_inner(
790+
&dep.package_name(),
791+
dep.version_req(),
792+
&mut *self.ops,
793+
&self.yanked_whitelist,
794+
&mut |s| {
795+
if dep.matches(&s) {
796+
called = true;
797+
f(s);
798+
}
799+
},
800+
))?;
800801
if called {
801-
return Poll::Ready(Ok(()));
802+
Poll::Ready(Ok(()))
802803
} else {
803804
debug!("falling back to an update");
804805
self.invalidate_cache();
805-
return Poll::Pending;
806+
Poll::Pending
806807
}
807-
}
808-
809-
self.index
810-
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
811-
let matched = match kind {
812-
QueryKind::Exact => dep.matches(&s),
813-
QueryKind::Fuzzy => true,
814-
};
815-
if matched {
816-
f(s);
808+
} else {
809+
let mut called = false;
810+
ready!(self.index.query_inner(
811+
&dep.package_name(),
812+
dep.version_req(),
813+
&mut *self.ops,
814+
&self.yanked_whitelist,
815+
&mut |s| {
816+
let matched = match kind {
817+
QueryKind::Exact => dep.matches(&s),
818+
QueryKind::Fuzzy => true,
819+
};
820+
if matched {
821+
f(s);
822+
called = true;
823+
}
824+
}
825+
))?;
826+
if called {
827+
return Poll::Ready(Ok(()));
828+
}
829+
let mut any_pending = false;
830+
if kind == QueryKind::Fuzzy {
831+
// Attempt to handle misspellings by searching for a chain of related
832+
// names to the original name. The resolver will later
833+
// reject any candidates that have the wrong name, and with this it'll
834+
// along the way produce helpful "did you mean?" suggestions.
835+
for name_permutation in
836+
index::UncanonicalizedIter::new(&dep.package_name()).take(1024)
837+
{
838+
any_pending |= self
839+
.index
840+
.query_inner(
841+
&name_permutation,
842+
dep.version_req(),
843+
&mut *self.ops,
844+
&self.yanked_whitelist,
845+
f,
846+
)?
847+
.is_pending();
817848
}
818-
})
849+
}
850+
if any_pending {
851+
Poll::Pending
852+
} else {
853+
Poll::Ready(Ok(()))
854+
}
855+
}
819856
}
820857

821858
fn supports_checksums(&self) -> bool {

‎tests/testsuite/publish.rs

+17
Original file line numberDiff line numberDiff line change
@@ -2566,6 +2566,8 @@ fn wait_for_first_publish_underscore() {
25662566
// Counter for number of tries before the package is "published"
25672567
let arc: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
25682568
let arc2 = arc.clone();
2569+
let misses = Arc::new(Mutex::new(Vec::new()));
2570+
let misses2 = misses.clone();
25692571

25702572
// Registry returns an invalid response.
25712573
let registry = registry::RegistryBuilder::new()
@@ -2580,6 +2582,14 @@ fn wait_for_first_publish_underscore() {
25802582
server.index(req)
25812583
}
25822584
})
2585+
.not_found_handler(move |req, _| {
2586+
misses.lock().unwrap().push(req.url.to_string());
2587+
Response {
2588+
body: b"not found".to_vec(),
2589+
code: 404,
2590+
headers: vec![],
2591+
}
2592+
})
25832593
.build();
25842594

25852595
let p = project()
@@ -2621,6 +2631,13 @@ You may press ctrl-c to skip waiting; the crate should be available shortly.
26212631
let lock = arc2.lock().unwrap();
26222632
assert_eq!(*lock, 2);
26232633
drop(lock);
2634+
{
2635+
let misses = misses2.lock().unwrap();
2636+
assert!(
2637+
misses.len() == 1,
2638+
"should only have 1 not found URL; instead found {misses:?}"
2639+
);
2640+
}
26242641

26252642
let p = project()
26262643
.file(

0 commit comments

Comments
 (0)
Please sign in to comment.