在C++中标记字符串介绍详细指南

2021年3月13日17:18:48 发表评论 747 次浏览

本文概述

标记字符串表示相对于某些定界符分割字符串。有很多标记字符串的方法。在本文中, 将对其中的四个进行说明:

使用stringstream

一种

串流

将字符串对象与流相关联, 使你可以像对待流一样读取字符串。

以下是C ++实现:

C ++

// Tokenizing a string using stringstream
#include <bits/stdc++.h>
  
using namespace std;
  
int main()
{
      
     string line = "lsbin is a must try" ;
      
     // Vector of string to save tokens
     vector <string> tokens;
      
     // stringstream class check1
     stringstream check1(line);
      
     string intermediate;
      
     // Tokenizing w.r.t. space ' '
     while (getline(check1, intermediate, ' ' ))
     {
         tokens.push_back(intermediate);
     }
      
     // Printing the token vector
     for ( int i = 0; i < tokens.size(); i++)
         cout << tokens[i] << '\n' ;
}

输出如下

lsbin
is
a
must
try

使用strtok()

// Splits str[] according to given delimiters.
// and returns next token. It needs to be called
// in a loop to get all tokens. It returns NULL
// when there are no more tokens.
char * strtok(char str[], const char *delims);

以下是C ++实现:

C ++

// C/C++ program for splitting a string
// using strtok()
#include <stdio.h>
#include <string.h>
  
int main()
{
     char str[] = "Geeks-for-Geeks" ;
  
     // Returns first token 
     char *token = strtok (str, "-" );
  
     // Keep printing tokens while one of the
     // delimiters present in str[].
     while (token != NULL)
     {
         printf ( "%s\n" , token);
         token = strtok (NULL, "-" );
     }
  
     return 0;
}

输出如下

Geeks
for
Geeks

strtok()的另一个示例:
 

C

// C code to demonstrate working of
// strtok
#include <string.h>
#include <stdio.h>
  
// Driver function
int main()
{
  // Declaration of string
     char gfg[100] = " Geeks - for - geeks - Contribute" ;
  
     // Declaration of delimiter
     const char s[4] = "-" ;
     char * tok;
  
     // Use of strtok
     // get first token
     tok = strtok (gfg, s);
  
     // Checks for delimeter
     while (tok != 0) {
         printf ( " %s\n" , tok);
  
         // Use of strtok
         // go through other tokens
         tok = strtok (0, s);
     }
  
     return (0);
}

输出如下

Geeks 
  for 
  geeks 
  Contribute

使用strtok_r()

就像C中的strtok()函数一样,

strtok_r()

执行将字符串解析为令牌序列的相同任务。 strtok_r()是strtok()的可重入版本。

有两种方法可以调用strtok_r()

// The third argument saveptr is a pointer to a char * 
// variable that is used internally by strtok_r() in 
// order to maintain context between successive calls
// that parse the same string.
char *strtok_r(char *str, const char *delim, char **saveptr);

下面是一个简单的C ++程序, 用于显示strtok_r()的用法:

CPP

// C/C++ program to demonstrate working of strtok_r()
// by splitting string based on space character.
#include<stdio.h>
#include<string.h>
  
int main()
{
     char str[] = "Geeks for Geeks" ;
     char *token;
     char *rest = str;
  
     while ((token = strtok_r(rest, " " , &rest)))
         printf ( "%s\n" , token);
  
     return (0);
}

输出如下

Geeks
for
Geeks

使用std :: sregex_token_iterator

在这种方法中, 基于正则表达式匹配进行标记化。当需要多个定界符时, 更适合用例。

下面是一个简单的C ++程序, 用于显示std :: sregex_token_iterator的用法:

C ++

// CPP program for above approach
#include <iostream>
#include <regex>
#include <string>
#include <vector>
  
/**
  * @brief Tokenize the given vector 
    according to the regex
  * and remove the empty tokens.
  *
  * @param str
  * @param re
  * @return std::vector<std::string>
  */
std::vector<std::string> tokenize(
                      const std::string str, const std::regex re)
{
     std::sregex_token_iterator it{ str.begin(), str.end(), re, -1 };
     std::vector<std::string> tokenized{ it, {} };
  
     // Additional check to remove empty strings
     tokenized.erase(
         std::remove_if(tokenized.begin(), tokenized.end(), [](std::string const & s) {
                            return s.size() == 0;
                        }), tokenized.end());
  
     return tokenized;
}
  
// Driver Code
int main()
{
     const std::string str = "Break string 
                    a, spaces, and, commas";
     const std::regex re(R "([\s|, ]+)" );
    
     // Function Call
     const std::vector<std::string> tokenized = 
                            tokenize(str, re);
    
     for (std::string token : tokenized)
         std::cout << token << std::endl;
     return 0;
}

输出如下

Break
string
a
spaces
and
commas

木子山

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: