Changeset 2704


Ignore:
Timestamp:
01/17/25 15:18:37 (3 weeks ago)
Author:
hshepherd
Message:

Port across changes from the patching - Not yet tested

Location:
XIOS2/dev/hshepherd/reduce_output_log/src
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • XIOS2/dev/hshepherd/reduce_output_log/src/client.cpp

    r2503 r2704  
    1313#include "buffer_client.hpp" 
    1414#include "string_tools.hpp" 
     15#include "timestats.hpp" 
    1516 
    1617namespace xios 
     
    261262    } 
    262263 
     264        std::vector<double> CClient::collate_timings(std::string timing_param) 
     265    { 
     266      int myrank, comm_size; 
     267      MPI_Comm_rank(intraComm, &myrank); 
     268      MPI_Comm_size(intraComm, &comm_size); 
     269 
     270      std::vector<double> collated_results; 
     271      MPI_Barrier(intraComm); 
     272      if (myrank == 0) { 
     273        double recv_val = 0.; 
     274        collated_results.push_back(CTimer::get(timing_param).getCumulatedTime()); 
     275        for (int i = 1; i < comm_size; i++) { 
     276          MPI_Recv(&recv_val, 1, MPI_DOUBLE, i, 0, intraComm, MPI_STATUS_IGNORE); 
     277          collated_results.push_back(recv_val); 
     278        } 
     279      } else { 
     280        double snd_val = CTimer::get(timing_param).getCumulatedTime(); 
     281        MPI_Send(&snd_val, 1, MPI_DOUBLE, 0, 0, intraComm); 
     282      } 
     283      MPI_Barrier(intraComm); 
     284      return collated_results; 
     285    } 
     286 
     287    void CClient::present_collated_timings(void) 
     288    { 
     289      std::vector<double> blocking_times = collate_timings("Blocking time"); 
     290      // we need to suspend the XIOS init/finalize timer briefly to gather the measurements 
     291      CTimer::get("XIOS init/finalize").suspend(); 
     292      std::vector<double> init_final_times = collate_timings("XIOS init/finalize"); 
     293      CTimer::get("XIOS init/finalize").resume(); 
     294      std::vector<double> ratio = percentage_ratio_vec_double(blocking_times, init_final_times); 
     295      if (rank_ == 0) { 
     296        int n_ranks; 
     297        MPI_Comm_size(intraComm, &n_ranks); 
     298         
     299        report(0) << " Performance metrics across all client ranks" << endl; 
     300        report(0) << "     Collating from " << n_ranks << " ranks" << endl; 
     301        write_summary_timings(blocking_times, "Time spent waiting for free buffer"); 
     302        write_summary_timings(ratio, "Waiting ratio (percentage)"); 
     303      } 
     304    } 
     305 
     306    void CClient::write_summary_timings(std::vector<double>& collated_results, 
     307                                        std::string results_label) 
     308    { 
     309      report(0) << "  " << results_label << " average " << calc_mean_double(collated_results) << endl; 
     310      report(0) << "  " << results_label << " std dev " << calc_std_double(collated_results) << endl; 
     311      report(0) << "  " << results_label << " min " << calc_min_double(collated_results) << endl; 
     312      report(0) << "  " << results_label << " max " << calc_max_double(collated_results) << endl; 
     313    } 
    263314 
    264315    void CClient::finalize(void) 
     
    266317      int rank ; 
    267318      int msg=0 ; 
     319 
     320      // Do our collated summary 
     321      present_collated_timings(); 
     322      MPI_Barrier(intraComm); 
    268323 
    269324      MPI_Comm_rank(intraComm,&rank) ; 
     
    294349       
    295350      info(20) << "Client side context is finalized"<<endl ; 
     351 
     352      if (CXios::reduceLogFiles) report(0) << "Performance summary from the first rank in this intraComm." << endl; 
     353 
    296354      report(0) <<" Performance report : Whole time from XIOS init and finalize: "<< CTimer::get("XIOS init/finalize").getCumulatedTime()<<" s"<<endl ; 
    297355      report(0) <<" Performance report : total time spent for XIOS : "<< CTimer::get("XIOS").getCumulatedTime()<<" s"<<endl ; 
     
    354412 
    355413    /*! 
     414    * \brief Check to see if we are on a rank for which we an output file 
     415    * If reduced output is selected we only write from a single client 
     416    * rank 
     417    */ 
     418    bool CClient::writeLogFromRank() 
     419    { 
     420      bool do_write = false; 
     421      if (!CXios::reduceLogFiles) 
     422      { 
     423        return true; 
     424      } 
     425      if (rank_ == 0) 
     426      { 
     427        do_write = true; 
     428      } 
     429      return do_write; 
     430    } 
     431 
     432    /*! 
    356433    * \brief Open a file stream to write the info logs 
    357434    * Open a file stream with a specific file name suffix+rank 
     
    359436    * \param fileName [in] protype file name 
    360437    */ 
    361     void CClient::openInfoStream(const StdString& fileName) 
    362     { 
    363       std::filebuf* fb = m_infoStream.rdbuf(); 
    364       openStream(fileName, ".out", fb); 
    365  
    366       info.write2File(fb); 
    367       report.write2File(fb); 
     438        void CClient::openInfoStream(const StdString& fileName) 
     439    { 
     440      if (writeLogFromRank()) 
     441      { 
     442        std::filebuf* fb = m_infoStream.rdbuf(); 
     443        openStream(fileName, ".out", fb); 
     444 
     445        info.write2File(fb); 
     446        report.write2File(fb); 
     447      } 
    368448    } 
    369449 
     
    371451    void CClient::openInfoStream() 
    372452    { 
    373       info.write2StdOut(); 
    374       report.write2StdOut(); 
     453      if (writeLogFromRank()) 
     454      { 
     455        info.write2StdOut(); 
     456        report.write2StdOut(); 
     457      } 
    375458    } 
    376459 
     
    378461    void CClient::closeInfoStream() 
    379462    { 
    380       if (m_infoStream.is_open()) m_infoStream.close(); 
    381     } 
     463      if (writeLogFromRank()) 
     464      { 
     465        if (m_infoStream.is_open()) m_infoStream.close(); 
     466      } 
     467    } 
     468 
    382469 
    383470    /*! 
     
    389476    void CClient::openErrorStream(const StdString& fileName) 
    390477    { 
    391       std::filebuf* fb = m_errorStream.rdbuf(); 
    392       openStream(fileName, ".err", fb); 
    393  
    394       error.write2File(fb); 
     478      if (writeLogFromRank()) 
     479      { 
     480        std::filebuf* fb = m_errorStream.rdbuf(); 
     481        openStream(fileName, ".err", fb); 
     482 
     483        error.write2File(fb); 
     484      } 
    395485    } 
    396486 
     
    398488    void CClient::openErrorStream() 
    399489    { 
    400       error.write2StdErr(); 
     490      if (writeLogFromRank()) 
     491      { 
     492        error.write2StdErr(); 
     493      } 
    401494    } 
    402495 
     
    404497    void CClient::closeErrorStream() 
    405498    { 
    406       if (m_errorStream.is_open()) m_errorStream.close(); 
     499      if (writeLogFromRank()) 
     500      { 
     501        if (m_errorStream.is_open()) m_errorStream.close(); 
     502      } 
    407503    } 
    408504} 
  • XIOS2/dev/hshepherd/reduce_output_log/src/client.hpp

    r1639 r2704  
    4646 
    4747        static void openStream(const StdString& fileName, const StdString& ext, std::filebuf* fb); 
     48 
     49      private: 
     50        static bool writeLogFromRank(); 
     51        static std::vector<double> collate_timings(std::string); 
     52        static void present_collated_timings(); 
     53        static void write_summary_timings(std::vector<double>&, std::string); 
    4854    }; 
    4955} 
  • XIOS2/dev/hshepherd/reduce_output_log/src/cxios.cpp

    r2503 r2704  
    4141  StdSize CXios::maxBufferSize = std::numeric_limits<int>::max() ; 
    4242  bool CXios::printLogs2Files; 
     43  bool CXios::reduceLogFiles; 
    4344  bool CXios::isOptPerformance = true; 
    4445  CRegistry* CXios::globalRegistry = 0; 
     
    8283    report.setLevel(getin<int>("info_level",50)); 
    8384    printLogs2Files=getin<bool>("print_file",false); 
    84  
     85    reduceLogFiles=getin<bool>("reduce_logging",false); 
    8586    xiosStack=getin<bool>("xios_stack",true) ; 
    8687    systemStack=getin<bool>("system_stack",false) ; 
  • XIOS2/dev/hshepherd/reduce_output_log/src/cxios.hpp

    r2503 r2704  
    4343 
    4444     static bool printLogs2Files; //!< Printing out logs into files 
     45     static bool reduceLogFiles;  //!<Reduce the log files to lead client, lead server and lead level2 server 
    4546     static bool usingOasis ;     //!< Using Oasis 
    4647     static bool usingServer ;    //!< Using server (server mode) 
  • XIOS2/dev/hshepherd/reduce_output_log/src/server.cpp

    r2503 r2704  
    1414#include "timer.hpp" 
    1515#include "mem_checker.hpp" 
     16#include "timestats.hpp" 
    1617#include "event_scheduler.hpp" 
    1718#include "string_tools.hpp" 
     
    3435    bool CServer::finished=false ; 
    3536    bool CServer::is_MPI_Initialized ; 
     37    bool CServer::writeLogFromRank; 
    3638    CEventScheduler* CServer::eventScheduler = 0; 
    3739 
     
    397399      if (rank==0) isRoot=true; 
    398400      else isRoot=false; 
     401 
     402      writeLogFromRank = determineWriteLogFromRank(); 
    399403       
    400404      eventScheduler = new CEventScheduler(intraComm) ; 
    401405    } 
    402406 
     407    std::vector<double> CServer::collate_timings(std::string timing_param) 
     408    { 
     409      int myrank, comm_size; 
     410      MPI_Comm_rank(intraComm, &myrank); 
     411      MPI_Comm_size(intraComm, &comm_size); 
     412 
     413      std::vector<double> collated_results; 
     414      MPI_Barrier(intraComm); 
     415      if (myrank == 0) { 
     416        double recv_val = 0.; 
     417        collated_results.push_back(CTimer::get(timing_param).getCumulatedTime()); 
     418        for (int i = 1; i < comm_size; i++) { 
     419          MPI_Recv(&recv_val, 1, MPI_DOUBLE, i, 0, intraComm, MPI_STATUS_IGNORE); 
     420          collated_results.push_back(recv_val); 
     421        } 
     422      } else { 
     423        double snd_val = CTimer::get(timing_param).getCumulatedTime(); 
     424        MPI_Send(&snd_val, 1, MPI_DOUBLE, 0, 0, intraComm); 
     425      } 
     426      MPI_Barrier(intraComm); 
     427      return collated_results; 
     428    } 
     429 
     430    void CServer::present_collated_timings(void) 
     431    { 
     432      std::vector<double> processing_times = collate_timings("Process events"); 
     433      std::vector<double> xios_server_times = collate_timings("XIOS server"); 
     434      std::vector<double> ratio = percentage_ratio_vec_double(processing_times, xios_server_times); 
     435      // writeLogFromRank gives the lead rank for L1 servers and server pools 
     436      if (getRank() == 0) { 
     437        int n_ranks; 
     438        MPI_Comm_size(intraComm, &n_ranks); 
     439        report(0) << " Performance metrics across the intraComm communicator for this rank." << endl; 
     440        report(0) << "     There are " << n_ranks << " ranks in this intraComm" << endl; 
     441        write_summary_timings(processing_times, "Processing events"); 
     442        write_summary_timings(ratio, "Ratio (Percentage)"); 
     443      } 
     444    } 
     445       
     446 
     447    void CServer::write_summary_timings(std::vector<double>& collated_results, 
     448                                        std::string results_label) 
     449    { 
     450      report(0) << "  " << results_label << " average " << calc_mean_double(collated_results) << endl; 
     451      report(0) << "  " << results_label << " std dev " << calc_std_double(collated_results) << endl; 
     452      report(0) << "  " << results_label << " min " << calc_min_double(collated_results) << endl; 
     453      report(0) << "  " << results_label << " max " << calc_max_double(collated_results) << endl; 
     454    } 
     455   
     456 
    403457    void CServer::finalize(void) 
    404458    { 
     459      present_collated_timings(); 
     460      MPI_Barrier(intraComm); 
     461 
    405462      CTimer::get("XIOS").suspend() ; 
    406463      
     
    430487        else MPI_Finalize() ; 
    431488      } 
     489 
     490      if (CXios::reduceLogFiles) report(0) << "Performance summary from the first rank in this intraComm." << endl; 
     491 
    432492      report(0)<<"Performance report : Time spent for XIOS : "<<CTimer::get("XIOS server").getCumulatedTime()<<endl  ; 
    433493      report(0)<<"Performance report : Time spent in processing events : "<<CTimer::get("Process events").getCumulatedTime()<<endl  ; 
     
    907967    } 
    908968 
     969    bool CServer::determineWriteLogFromRank() 
     970    { 
     971      // Write from rank zero of each intracomm, which will be the rank of 
     972      // the lead level one and two servers 
     973      if (!CXios::reduceLogFiles) return true; 
     974      if (getRank() == 0) return true; 
     975      return false; 
     976    } 
     977 
    909978    /*! 
    910979    * \brief Open a file stream to write the info logs 
     
    915984    void CServer::openInfoStream(const StdString& fileName) 
    916985    { 
    917       std::filebuf* fb = m_infoStream.rdbuf(); 
    918       openStream(fileName, ".out", fb); 
    919  
    920       info.write2File(fb); 
    921       report.write2File(fb); 
     986      if (writeLogFromRank) 
     987      { 
     988        std::filebuf* fb = m_infoStream.rdbuf(); 
     989        openStream(fileName, ".out", fb); 
     990 
     991        info.write2File(fb); 
     992        report.write2File(fb); 
     993      } 
    922994    } 
    923995 
     
    925997    void CServer::openInfoStream() 
    926998    { 
    927       info.write2StdOut(); 
    928       report.write2StdOut(); 
     999      if (writeLogFromRank) 
     1000      { 
     1001        info.write2StdOut(); 
     1002        report.write2StdOut(); 
     1003      } 
    9291004    } 
    9301005 
     
    9321007    void CServer::closeInfoStream() 
    9331008    { 
    934       if (m_infoStream.is_open()) m_infoStream.close(); 
     1009      if (writeLogFromRank) 
     1010      { 
     1011        if (m_infoStream.is_open()) m_infoStream.close(); 
     1012      } 
    9351013    } 
    9361014 
     
    9431021    void CServer::openErrorStream(const StdString& fileName) 
    9441022    { 
    945       std::filebuf* fb = m_errorStream.rdbuf(); 
    946       openStream(fileName, ".err", fb); 
    947  
    948       error.write2File(fb); 
     1023      if (writeLogFromRank) 
     1024      { 
     1025        std::filebuf* fb = m_errorStream.rdbuf(); 
     1026        openStream(fileName, ".err", fb); 
     1027 
     1028        error.write2File(fb); 
     1029      } 
    9491030    } 
    9501031 
     
    9521033    void CServer::openErrorStream() 
    9531034    { 
    954       error.write2StdErr(); 
     1035      if (writeLogFromRank) 
     1036      { 
     1037        error.write2StdErr(); 
     1038      } 
    9551039    } 
    9561040 
     
    9581042    void CServer::closeErrorStream() 
    9591043    { 
    960       if (m_errorStream.is_open()) m_errorStream.close(); 
     1044      if (writeLogFromRank) 
     1045      { 
     1046        if (m_errorStream.is_open()) m_errorStream.close(); 
     1047      } 
    9611048    } 
    9621049} 
  • XIOS2/dev/hshepherd/reduce_output_log/src/server.hpp

    r1639 r2704  
    7474        static StdOFStream m_infoStream; 
    7575        static StdOFStream m_errorStream; 
     76        static bool writeLogFromRank; 
    7677        static void openStream(const StdString& fileName, const StdString& ext, std::filebuf* fb); 
     78        static std::vector<double> collate_timings(std::string); 
     79        static void present_collated_timings(); 
     80        static void write_summary_timings(std::vector<double>&, std::string); 
     81        static bool determineWriteLogFromRank(); 
    7782    }; 
    7883} 
Note: See TracChangeset for help on using the changeset viewer.