From: csagan5 <32685696+csagan5@users.noreply.github.com> Date: Sun, 6 Mar 2022 18:55:58 +0100 Subject: OpenSearch: miscellaneous Fix upstream bug with recently added engines prematurely discarded because they have no last-visit timestamp Fix upstream bug with visited engines visit time not updated Allow adding search engines in incognito mode Allow using search engine URLs with non-empty paths Add verbose logging License: GPL-3.0-only - https://spdx.org/licenses/GPL-3.0-only.html --- .../settings/SearchEngineAdapter.java | 4 +- .../search_engine_tab_helper.cc | 51 ++++++++++++++----- .../renderer/chrome_render_frame_observer.cc | 2 + .../search_engines/template_url_fetcher.cc | 24 +++++++-- .../search_engines/template_url_parser.cc | 2 +- .../search_engines/template_url_service.h | 8 +-- 6 files changed, 67 insertions(+), 24 deletions(-) diff --git a/chrome/browser/search_engines/android/java/src/org/chromium/chrome/browser/search_engines/settings/SearchEngineAdapter.java b/chrome/browser/search_engines/android/java/src/org/chromium/chrome/browser/search_engines/settings/SearchEngineAdapter.java --- a/chrome/browser/search_engines/android/java/src/org/chromium/chrome/browser/search_engines/settings/SearchEngineAdapter.java +++ b/chrome/browser/search_engines/android/java/src/org/chromium/chrome/browser/search_engines/settings/SearchEngineAdapter.java @@ -245,7 +245,9 @@ public class SearchEngineAdapter extends BaseAdapter continue; } if (recentEngineNum < MAX_RECENT_ENGINE_NUM - && templateUrl.getLastVisitedTime() > displayTime) { + // newly added search engines have never been visited + && (templateUrl.getLastVisitedTime() == 0 || + templateUrl.getLastVisitedTime() > displayTime)) { recentEngineNum++; } else { iterator.remove(); diff --git a/chrome/browser/ui/search_engines/search_engine_tab_helper.cc b/chrome/browser/ui/search_engines/search_engine_tab_helper.cc --- a/chrome/browser/ui/search_engines/search_engine_tab_helper.cc +++ b/chrome/browser/ui/search_engines/search_engine_tab_helper.cc @@ -6,6 +6,7 @@ #include +#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "chrome/browser/favicon/favicon_utils.h" #include "chrome/browser/profiles/profile.h" @@ -46,8 +47,10 @@ void SearchEngineTabHelper::BindOpenSearchDescriptionDocumentHandler( mojo::PendingReceiver receiver) { // Bind only for outermost main frames. - if (rfh->GetParentOrOuterDocument()) + if (rfh->GetParentOrOuterDocument()) { + LOG(INFO) << "OpenSearch: not on main frame"; return; + } auto* web_contents = content::WebContents::FromRenderFrameHost(rfh); if (!web_contents) @@ -73,16 +76,20 @@ std::u16string SearchEngineTabHelper::GenerateKeywordFromNavigationEntry( NavigationEntry* entry) { // Don't autogenerate keywords for pages that are the result of form // submissions. - if (IsFormSubmit(entry)) + if (IsFormSubmit(entry)) { + LOG(INFO) << "OpenSearch: cannot generate keyword for a form submission for entry " << entry->GetURL(); return std::u16string(); + } // We want to use the user typed URL if available since that represents what // the user typed to get here, and fall back on the regular URL if not. GURL url = entry->GetUserTypedURL(); if (!url.is_valid()) { url = entry->GetURL(); - if (!url.is_valid()) + if (!url.is_valid()) { + LOG(INFO) << "OpenSearch: user-typed/entry URL are invalid for entry " << entry->GetURL(); return std::u16string(); + } } // Don't autogenerate keywords for referrers that @@ -90,10 +97,10 @@ std::u16string SearchEngineTabHelper::GenerateKeywordFromNavigationEntry( // b) have a path. // // If we relax the path constraint, we need to be sure to sanitize the path - // elements and update AutocompletePopup to look for keywords using the path. + // elements and update TemplateURL to look for keywords using the path. // See http://b/issue?id=863583. - if (!(url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme)) || - (url.path().length() > 1)) { + if (!(url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme))) { + LOG(INFO) << "OpenSearch: invalid scheme for entry " << entry->GetURL(); return std::u16string(); } @@ -121,15 +128,18 @@ void SearchEngineTabHelper::PageHasOpenSearchDescriptionDocument( // When |page_url| has file: scheme, this method doesn't work because of // http://b/issue?id=863583. For that reason, this doesn't check and allow // urls referring to osdd urls with same schemes. - if (!osdd_url.is_valid() || !osdd_url.SchemeIsHTTPOrHTTPS()) + if (!osdd_url.is_valid() || !osdd_url.SchemeIsHTTPOrHTTPS()) { + LOG(INFO) << "OpenSearch: not a valid OSDD URL " << osdd_url; return; + } Profile* profile = Profile::FromBrowserContext(web_contents()->GetBrowserContext()); if (page_url != web_contents()->GetLastCommittedURL() || - !TemplateURLFetcherFactory::GetForProfile(profile) || - profile->IsOffTheRecord()) + !TemplateURLFetcherFactory::GetForProfile(profile)) { + LOG(INFO) << "OpenSearch: page URL mismatch on page " << page_url; return; + } // If the current page is a form submit, find the last page that was not a // form submit and use its url to generate the keyword from. @@ -139,8 +149,10 @@ void SearchEngineTabHelper::PageHasOpenSearchDescriptionDocument( (index > 0) && IsFormSubmit(entry); entry = controller.GetEntryAtIndex(index)) --index; - if (!entry || IsFormSubmit(entry)) + if (!entry || IsFormSubmit(entry)) { + LOG(INFO) << "OpenSearch: cannot find form submission for entry " << entry->GetURL(); return; + } // Autogenerate a keyword for the autodetected case; in the other cases we'll // generate a keyword later after fetching the OSDD. @@ -148,6 +160,12 @@ void SearchEngineTabHelper::PageHasOpenSearchDescriptionDocument( if (keyword.empty()) return; + std::u16string page_keyword = TemplateURL::GenerateKeyword(page_url); + if (page_keyword != keyword) { + LOG(INFO) << "OpenSearch: keyword mismatch for entry " << entry->GetURL(); + return; + } + auto* frame = web_contents()->GetPrimaryMainFrame(); mojo::Remote url_loader_factory; frame->CreateNetworkServiceDefaultFactory( @@ -155,6 +173,7 @@ void SearchEngineTabHelper::PageHasOpenSearchDescriptionDocument( // Download the OpenSearch description document. If this is successful, a // new keyword will be created when done. + // NOTE: for search pages under the same domain only 1 keyword is supported TemplateURLFetcherFactory::GetForProfile(profile)->ScheduleDownload( keyword, osdd_url, entry->GetFavicon().url, frame->GetLastCommittedOrigin(), url_loader_factory.get(), @@ -196,11 +215,18 @@ void SearchEngineTabHelper::GenerateKeywordIfNecessary( if (last_index <= 0) return; - std::u16string keyword(GenerateKeywordFromNavigationEntry( - controller.GetEntryAtIndex(last_index - 1))); + NavigationEntry* entry = controller.GetEntryAtIndex(last_index - 1); + std::u16string keyword(GenerateKeywordFromNavigationEntry(entry)); if (keyword.empty()) return; + GURL url = handle->GetSearchableFormURL(); + std::u16string page_keyword = TemplateURL::GenerateKeyword(url); + if (page_keyword != keyword) { + LOG(INFO) << "OpenSearch: GenerateKeywordIfNecessary(): keyword mismatch for entry " << entry->GetURL(); + return; + } + TemplateURLService* url_service = TemplateURLServiceFactory::GetForProfile(profile); if (!url_service) @@ -211,7 +237,6 @@ void SearchEngineTabHelper::GenerateKeywordIfNecessary( return; } - GURL url = handle->GetSearchableFormURL(); if (!url_service->CanAddAutogeneratedKeyword(keyword, url)) return; diff --git a/chrome/renderer/chrome_render_frame_observer.cc b/chrome/renderer/chrome_render_frame_observer.cc --- a/chrome/renderer/chrome_render_frame_observer.cc +++ b/chrome/renderer/chrome_render_frame_observer.cc @@ -14,6 +14,7 @@ #include "base/command_line.h" #include "base/functional/bind.h" +#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/no_destructor.h" #include "base/strings/string_number_conversions.h" @@ -258,6 +259,7 @@ void ChromeRenderFrameObserver::DidFinishLoad() { GURL osdd_url = frame->GetDocument().OpenSearchDescriptionURL(); if (!osdd_url.is_empty()) { + DLOG(INFO) << "OpenSearch: found OSDD URL: " << osdd_url; mojo::Remote osdd_handler; render_frame()->GetBrowserInterfaceBroker()->GetInterface( diff --git a/components/search_engines/template_url_fetcher.cc b/components/search_engines/template_url_fetcher.cc --- a/components/search_engines/template_url_fetcher.cc +++ b/components/search_engines/template_url_fetcher.cc @@ -259,18 +259,32 @@ void TemplateURLFetcher::ScheduleDownload( return; } - const TemplateURL* template_url = + TemplateURL* template_url = template_url_service_->GetTemplateURLForKeyword(keyword); - if (template_url && (!template_url->safe_for_autoreplace() || - template_url->originating_url() == osdd_url)) - return; + if (template_url) { + if (!template_url->safe_for_autoreplace()) { + LOG(INFO) << "OpenSearch: OSDD URL not safe for autoreplace: " << osdd_url; + return; + } + if (template_url->originating_url() == osdd_url) { + // Either there is a user created TemplateURL for this keyword, or the + // keyword has the same OSDD url and we've parsed it. + DLOG(INFO) << "OpenSearch: OSDD URL was already parsed: " << osdd_url; + // always update the visit timestamp + template_url_service_->UpdateTemplateURLVisitTime(template_url); + return; + } + } // Make sure we aren't already downloading this request. for (const auto& request : requests_) { - if ((request->url() == osdd_url) || (request->keyword() == keyword)) + if ((request->url() == osdd_url) || (request->keyword() == keyword)) { + LOG(INFO) << "OpenSearch: already downloading OSDD URL: " << osdd_url; return; + } } + LOG(INFO) << "OpenSearch: getting " << osdd_url; requests_.push_back(std::make_unique( this, keyword, osdd_url, favicon_url, initiator, url_loader_factory, render_frame_id, request_id)); diff --git a/components/search_engines/template_url_parser.cc b/components/search_engines/template_url_parser.cc --- a/components/search_engines/template_url_parser.cc +++ b/components/search_engines/template_url_parser.cc @@ -173,7 +173,7 @@ void SafeTemplateURLParser::OnXmlParseComplete( std::move(callback_).Run([&]() -> std::unique_ptr { ASSIGN_OR_RETURN(const base::Value root, std::move(value_or_error), [](std::string error) -> std::unique_ptr { - DLOG(ERROR) + LOG(ERROR) << "Failed to parse XML: " << std::move(error); return nullptr; }); diff --git a/components/search_engines/template_url_service.h b/components/search_engines/template_url_service.h --- a/components/search_engines/template_url_service.h +++ b/components/search_engines/template_url_service.h @@ -277,7 +277,10 @@ class TemplateURLService : public WebDataServiceConsumer, void UpdateProviderFavicons(const GURL& potential_search_url, const GURL& favicon_url); - // Return true if the given |url| can be made the default. This returns false + // Updates the last_visited time of |url| to the current time. + void UpdateTemplateURLVisitTime(TemplateURL* url); + + // Return true if the given |url| can be made the default. This returns false // regardless of |url| if the default search provider is managed by policy or // controlled by an extension. bool CanMakeDefault(const TemplateURL* url) const; @@ -643,9 +646,6 @@ class TemplateURLService : public WebDataServiceConsumer, // SetKeywordSearchTermsForURL is invoked. void UpdateKeywordSearchTermsForURL(const URLVisitedDetails& details); - // Updates the last_visited time of |url| to the current time. - void UpdateTemplateURLVisitTime(TemplateURL* url); - // If necessary, generates a visit for the site http:// + t_url.keyword(). void AddTabToSearchVisit(const TemplateURL& t_url); -- 2.25.1