Regex wycinanie tekstu
Ostatnio zmodyfikowano 2017-10-15 17:27
Breakermind Temat założony przez niniejszego użytkownika |
Regex wycinanie tekstu » 2017-10-15 16:57:59 Chciałbym pobrać tylko linie które zawierają Cc: lub CC: lub cC: i cc: z tekstu #include <string> #include <sys/types.h> #include <regex>
std::string findCc( const string & s ) { std::regex rgx( "(\\nCC: |^CC: ))(.)+(\\n|\\r\\n|\\0)" ); std::smatch match; if( std::regex_search( s.begin(), s.end(), match, rgx ) ) std::cout << "match From: " << match[ 0 ] << '\n'; return std::string( match[ 0 ] ); }
try { const string s = "From:<hello@qflas.pl>\r\nTo:<hello@qflas.pl>\r\nCc:<llo@bobobo.po>,\" Kazik \" <hello@ddd.ddd>,\" Maxiu\" <hehe@ooo.hhh>\r\nBcc:<hello@qflas.pl>"; cout << findCc( s ) << endl; } catch( const std::runtime_error & ee ) { std::cerr << "Runtime error: " << ee.what() << std::endl; } catch( std::exception & e ) { cout << "# ERR: MainErrorException in " << __FILE__; cout << "(" << __FUNCTION__ << ") on line " << __LINE__ << endl; cout << "# ERR: " << e.what() << endl; } catch(...) { std::cerr << "Unknown failure occurred. Possible memory corruption" << std::endl; }
Lecz mam error, gdzie jest błąd i jak go poprawić? |
|
Breakermind Temat założony przez niniejszego użytkownika |
» 2017-10-15 17:27:59 A już innej funkcji użyłem: vector < string > findCc( string string ) { vector < std::string > ve; ve.clear(); std::regex pattern( "((CC:).*)", regex_constants::icase ); std::sregex_iterator iter( string.begin(), string.end(), pattern ); std::sregex_iterator end; while( iter != end ) { for( unsigned i = 0; i < iter->size(); i++ ) { ve.push_back(( * iter )[ i ] ); } ++iter; } return ve; }
string s = "From:<hello@qflas.pl>\r\nTo:<hello@qflas.pl>\r\nCc:<llo@bobobo.pl>,\" Kazik \" <hello@ddd.ddd>,\" Maxiu \" <hehe@ooo.hhh>\r\nBcc:<hello@qflas.pl>";
vector < string > aa = findCc( s ); cout << aa.at( 0 );
Wycinanie adresów email z tekstu: vector < std::string > searchEmails( string string ) { vector < string > ve; ve.clear(); std::regex pattern( "(((\\w+([-\\._])+)+|())\\w+@(\\w+([-\\.])+)+\\w+)" ); std::sregex_iterator iter( string.begin(), string.end(), pattern ); std::sregex_iterator end; while( iter != end ) { for( unsigned i = 0; i < iter->size(); ++i ) { ve.push_back(( * iter )[ 0 ] ); } ++iter; } sort( ve.begin(), ve.end() ); ve.erase( unique( ve.begin(), ve.end() ), ve.end() ); return ve; }
Lub tak: vector < vector < string >> findEmails( const string & s ) { const string & reg_ex( "((((\\w+([-\\._])+)+|())\\w+@(\\w+([-\\.])+)+\\w+))" ); regex rx( reg_ex, regex_constants::icase ); vector < vector < string >> captured_groups; vector < string > captured_subgroups; const std::sregex_token_iterator end_i; for( std::sregex_token_iterator i( s.cbegin(), s.cend(), rx ); i != end_i; ++i ) { captured_subgroups.clear(); string group = * i; smatch res; if( regex_search( group, res, rx ) ) { for( unsigned i = 0; i < res.size(); i++ ) captured_subgroups.push_back( res[ i ] ); if( captured_subgroups.size() > 0 ) captured_groups.push_back( captured_subgroups ); } } captured_groups.push_back( captured_subgroups ); sort( captured_groups.begin(), captured_groups.end() ); captured_groups.erase( unique( captured_groups.begin(), captured_groups.end() ), captured_groups.end() ); return captured_groups; }
int main() { string em = "From; <ema-il@d-dd.ddd>, hel.lo@aaaa.dd-dd.ppp,bo_om@ee-ee.ddd"; vector < vector < string >> ema = findEmails( em ); for( unsigned int i = 0; i < ema.size(); i++ ) { for( unsigned int j = 0; j < ema.at( i ).size(); j++ ) { cout << ema.at( i ).at( i ) << endl; break; } } return 0; }
Wycynanie tekstu z regex: string removeBcc( string s ) { std::regex reg( "((BCC:)(.*))", regex_constants::icase ); return std::regex_replace( s, reg, "" ); }
|
|
« 1 » |