#include #include #include #include #include #include #include #include #include using namespace std; const int MAX_RESULT_DOCUMENT_COUNT = 5; string ReadLine() { string s; getline(cin, s); return s; } int ReadLineWithNumber() { int result; cin >> result; ReadLine(); return result; } vector SplitIntoWords(const string& text) { vector words; string word; for (const char c : text) { if (c == ' ') { if (!word.empty()) { words.push_back(word); word.clear(); } } else { word += c; } } if (!word.empty()) { words.push_back(word); } return words; } struct Document { int id; double relevance; int rating; }; class SearchServer { public: void SetStopWords(const string& text) { for (const string& word : SplitIntoWords(text)) { stop_words_.insert(word); } } void AddDocument(int document_id, const string& document, const vector ratings) { ++document_count_; const vector words = SplitIntoWordsNoStop(document); const double inv_word_count = 1.0 / words.size(); for (const string& word : words) { word_to_document_freqs_[word][document_id] += inv_word_count; } document_ratings_[document_id] = ComputeAverageRating(ratings); } vector FindTopDocuments(const string& raw_query) const { const Query query = ParseQuery(raw_query); auto matched_documents = FindAllDocuments(query); sort(matched_documents.begin(), matched_documents.end(), [](const Document& lhs, const Document& rhs) { return lhs.relevance > rhs.relevance; }); if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) { matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT); } return matched_documents; } private: int document_count_ = 0; set stop_words_; map> word_to_document_freqs_; map document_ratings_; bool IsStopWord(const string& word) const { return stop_words_.count(word) > 0; } vector SplitIntoWordsNoStop(const string& text) const { vector words; for (const string& word : SplitIntoWords(text)) { if (!IsStopWord(word)) { words.push_back(word); } } return words; } struct QueryWord { string data; bool is_minus; bool is_stop; }; QueryWord ParseQueryWord(string text) const { bool is_minus = false; // Word shouldn't be empty if (text[0] == '-') { is_minus = true; text = text.substr(1); } return {text, is_minus, IsStopWord(text)}; } struct Query { set plus_words; set minus_words; }; Query ParseQuery(const string& text) const { Query query; for (const string& word : SplitIntoWords(text)) { const QueryWord query_word = ParseQueryWord(word); if (!query_word.is_stop) { if (query_word.is_minus) { query.minus_words.insert(query_word.data); } else { query.plus_words.insert(query_word.data); } } } return query; } // Existence required double ComputeWordInverseDocumentFreq(const string& word) const { return log(document_count_ * 1.0 / word_to_document_freqs_.at(word).size()); } vector FindAllDocuments(const Query& query) const { map document_to_relevance; for (const string& word : query.plus_words) { if (word_to_document_freqs_.count(word) == 0) { continue; } const double inverse_document_freq = ComputeWordInverseDocumentFreq(word); for (const auto &[document_id, term_freq] : word_to_document_freqs_.at(word)) { document_to_relevance[document_id] += term_freq * inverse_document_freq; } } for (const string& word : query.minus_words) { if (word_to_document_freqs_.count(word) == 0) { continue; } for (const auto &[document_id, _] : word_to_document_freqs_.at(word)) { document_to_relevance.erase(document_id); } } vector matched_documents; for (const auto &[document_id, relevance] : document_to_relevance) { int rating = document_ratings_.at(document_id); matched_documents.push_back({document_id, relevance, rating}); } return matched_documents; } int ComputeAverageRating(const vector& ratings) { if (ratings.empty()) { return 0; } else { vector result = ratings; auto iter = result.cbegin(); result.erase(iter); int size = result.size(); return accumulate(result.begin(), result.end(), 0) / size; } } }; SearchServer CreateSearchServer() { SearchServer search_server; search_server.SetStopWords(ReadLine()); const int document_count = ReadLineWithNumber(); for (int document_id = 0; document_id < document_count; ++document_id) { string document = ReadLine(); vector ratings_string = SplitIntoWords(ReadLine()); vector ratings; transform(ratings_string.begin(), ratings_string.end(), back_inserter(ratings), [&](string s) { return stoi(s); }); search_server.AddDocument(document_id, document, ratings); } return search_server; } int main() { const SearchServer search_server = CreateSearchServer(); const string query = ReadLine(); for (auto [document_id, relevance, rating] : search_server.FindTopDocuments(query)) { cout << "{ document_id = "s << document_id << ", "s << "relevance = "s << relevance << ", "s << "rating = "s << rating << " }"s << endl; } }