// fig08_15.cpp // Reading the Titanic dataset from a CSV file, then analyzing it. #include #include #include #include #include #include #include #include #include int main() { // load Titanic dataset; treat missing age values as NaN rapidcsv::Document titanic{"titanic.csv", rapidcsv::LabelParams{}, rapidcsv::SeparatorParams{}, rapidcsv::ConverterParams{true}}; // GetColumn returns column's data as a vector of the appropriate type auto survived{titanic.GetColumn("survived")}; auto sex{titanic.GetColumn("sex")}; auto age{titanic.GetColumn("age")}; auto pclass{titanic.GetColumn("pclass")}; // display first 5 rows std::cout << fmt::format("First five rows:\n{:<10}{:<8}{:<6}{}\n", "survived", "sex", "age", "class"); for (size_t i{0}; i < 5; ++i) { std::cout << fmt::format("{:<10}{:<8}{:<6.1f}{}\n", survived.at(i), sex.at(i), age.at(i), pclass.at(i)); } // display last 5 rows std::cout << fmt::format("\nLast five rows:\n{:<10}{:<8}{:<6}{}\n", "survived", "sex", "age", "class"); const auto count{titanic.GetRowCount()}; for (size_t i{count - 5}; i < count; ++i) { std::cout << fmt::format("{:<10}{:<8}{:<6.1f}{}\n", survived.at(i), sex.at(i), age.at(i), pclass.at(i)); } // use C++20 ranges to eliminate missing values from age column auto removeNaN{ age | std::views::filter([](const auto& x) {return !isnan(x);})}; std::vector cleanAge{ std::begin(removeNaN), std::end(removeNaN)}; // descriptive statistics for cleaned ages column std::sort(std::begin(cleanAge), std::end(cleanAge)); size_t size{cleanAge.size()}; double median{}; if (size % 2 == 0) { // find median value for even number of items median = (cleanAge.at(size / 2 - 1) + cleanAge.at(size / 2)) / 2; } else { // find median value for odd number of items median = cleanAge.at(size / 2); } std::cout << "\nDescriptive statistics for the age column:\n" << fmt::format("Passengers with age data: {}\n", size) << fmt::format("Average age: {:.2f}\n", std::accumulate( std::begin(cleanAge), std::end(cleanAge), 0.0) / size) << fmt::format("Minimum age: {:.2f}\n", cleanAge.front()) << fmt::format("Maximum age: {:.2f}\n", cleanAge.back()) << fmt::format("Median age: {:.2f}\n", median); // passenger counts by class auto countClass{ [](const auto& column, const int classNumber) { return std::ranges::count_if(column, [classNumber](int x) {return classNumber == x;}); } }; constexpr int firstClass{1}; constexpr int secondClass{2}; constexpr int thirdClass{3}; const auto firstCount{countClass(pclass, firstClass)}; const auto secondCount{countClass(pclass, secondClass)}; const auto thirdCount{countClass(pclass, thirdClass)}; std::cout << "\nPassenger counts by class:\n" << fmt::format("1st: {}\n2nd: {}\n3rd: {}\n\n", firstCount, secondCount, thirdCount); // percentage of people who survived const auto survivorCount{ std::ranges::count_if(survived, [](auto x) {return x;}) }; std::cout << fmt::format("Survived count: {}\nDied count: {}\n", survivorCount, survived.size() - survivorCount); std::cout << fmt::format("Percent who survived: {:.2f}%\n\n", 100.0 * survivorCount / survived.size()); // count who survived by male/female, 1st/2nd/3rd class int survivingMen{0}; int survivingWomen{0}; int surviving1st{0}; int surviving2nd{0}; int surviving3rd{0}; for (size_t i{0}; i < survived.size(); ++i) { if (survived.at(i)) { sex.at(i) == "female" ? ++survivingWomen : ++survivingMen; if (firstClass == pclass.at(i)) { ++surviving1st; } else if (secondClass == pclass.at(i)) { ++surviving2nd; } else { // third class ++surviving3rd; } } } // percentages who survived by male/female, 1st/2nd/3rd class std::cout << fmt::format("Female survivor percentage: {:.2f}%\n", 100.0 * survivingWomen / survivorCount) << fmt::format("Male survivor percentage: {:.2f}%\n\n", 100.0 * survivingMen / survivorCount) << fmt::format("1st class survivor percentage: {:.2f}%\n", 100.0 * surviving1st / survivorCount) << fmt::format("2nd class survivor percentage: {:.2f}%\n", 100.0 * surviving2nd / survivorCount) << fmt::format("3rd class survivor percentage: {:.2f}%\n", 100.0 * surviving3rd / survivorCount); } /************************************************************************** * (C) Copyright 1992-2022 by Deitel & Associates, Inc. and * * Pearson Education, Inc. All Rights Reserved. * * * * DISCLAIMER: The authors and publisher of this book have used their * * best efforts in preparing the book. These efforts include the * * development, research, and testing of the theories and programs * * to determine their effectiveness. The authors and publisher make * * no warranty of any kind, expressed or implied, with regard to these * * programs or to the documentation contained in these books. The authors * * and publisher shall not be liable in any event for incidental or * * consequential damages in connection with, or arising out of, the * * furnishing, performance, or use of these programs. * **************************************************************************/