🎯 Рекомендуемые коллекции
Балансированные коллекции примеров кода из различных категорий, которые вы можете исследовать
Обработка Строк Windows - Примеры C++
Полные примеры обработки строк в C++ для платформы Windows включая конкатенацию, регулярные выражения и операции замены
💻 Конкатенация и Разделение Строк cpp
🟡 intermediate
⭐⭐
Продвинутые техники конкатенации строк и разделения по разделителям с оптимизацией производительности
⏱️ 25 min
🏷️ cpp, string processing, algorithms, windows
Prerequisites:
C++ string basics, STL algorithms, Regular expressions
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <cctype>
#include <chrono>
#include <regex>
#include <iomanip>
#include <codecvt>
#include <locale>
#include <memory>
// 1. Basic string concatenation methods
void BasicConcatenation() {
std::cout << "=== Basic String Concatenation ===" << std::endl;
// Method 1: Using + operator
std::string str1 = "Hello";
std::string str2 = "World";
std::string result1 = str1 + " " + str2 + "!";
std::cout << "Using + operator: " << result1 << std::endl;
// Method 2: Using append()
std::string result2;
result2.append(str1);
result2.append(" ");
result2.append(str2);
result2.append("!");
std::cout << "Using append(): " << result2 << std::endl;
// Method 3: Using string stream
std::ostringstream oss;
oss << str1 << " " << str2 << "!";
std::string result3 = oss.str();
std::cout << "Using stringstream: " << result3 << std::endl;
// Method 4: Using push_back for character by character
std::string result4;
std::string combined = str1 + " " + str2 + "!";
for (char c : combined) {
result4.push_back(c);
}
std::cout << "Using push_back: " << result4 << std::endl;
}
// 2. Performance comparison of concatenation methods
void PerformanceComparison() {
std::cout << "\n=== Concatenation Performance Comparison ===" << std::endl;
const int ITERATIONS = 10000;
const std::string BASE_STRING = "Hello";
const std::string ADD_STRING = " World";
// Method 1: Using + operator
auto start = std::chrono::high_resolution_clock::now();
std::string result1;
for (int i = 0; i < ITERATIONS; i++) {
result1 = result1 + ADD_STRING;
}
auto end = std::chrono::high_resolution_clock::now();
auto duration1 = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Using + operator: " << duration1.count() << " μs" << std::endl;
// Method 2: Using append()
start = std::chrono::high_resolution_clock::now();
std::string result2;
for (int i = 0; i < ITERATIONS; i++) {
result2.append(ADD_STRING);
}
end = std::chrono::high_resolution_clock::now();
auto duration2 = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Using append(): " << duration2.count() << " μs" << std::endl;
// Method 3: Using stringstream
start = std::chrono::high_resolution_clock::now();
std::ostringstream oss;
oss << BASE_STRING;
for (int i = 0; i < ITERATIONS; i++) {
oss << ADD_STRING;
}
std::string result3 = oss.str();
end = std::chrono::high_resolution_clock::now();
auto duration3 = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Using stringstream: " << duration3.count() << " μs" << std::endl;
// Method 4: Using reserve() + append() (most efficient for large concatenations)
start = std::chrono::high_resolution_clock::now();
std::string result4;
result4.reserve(BASE_STRING.length() + ADD_STRING.length() * ITERATIONS);
result4 = BASE_STRING;
for (int i = 0; i < ITERATIONS; i++) {
result4.append(ADD_STRING);
}
end = std::chrono::high_resolution_clock::now();
auto duration4 = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Using reserve() + append(): " << duration4.count() << " μs" << std::endl;
// Performance summary
std::cout << "\nPerformance summary (μs):" << std::endl;
std::cout << "+ operator: " << duration1.count() << std::endl;
std::cout << "append(): " << duration2.count() << std::endl;
std::cout << "stringstream: " << duration3.count() << std::endl;
std::cout << "reserve + append: " << duration4.count() << " (fastest)" << std::endl;
}
// 3. Advanced concatenation with formatting
void AdvancedConcatenation() {
std::cout << "\n=== Advanced Concatenation with Formatting ===" << std::endl;
// Format integers, floats, and other types
int age = 25;
double height = 1.75;
std::string name = "Alice";
std::ostringstream formatted;
formatted << "Name: " << name << ", Age: " << age << ", Height: "
<< std::fixed << std::setprecision(2) << height << "m";
std::cout << "Formatted string: " << formatted.str() << std::endl;
// Using format-style placeholders
std::string templateStr = "User {0} is {1} years old and {2}m tall";
std::string formattedStr = FormatString(templateStr, {name, std::to_string(age), std::to_string(height)});
std::cout << "Custom formatted: " << formattedStr << std::endl;
// Padding and alignment
std::vector<std::string> names = {"Alice", "Bob", "Charlie", "David"};
std::vector<int> scores = {95, 87, 92, 78};
std::cout << "\nFormatted table:" << std::endl;
for (size_t i = 0; i < names.size(); i++) {
std::ostringstream row;
row << std::setw(10) << std::left << names[i]
<< " | Score: " << std::setw(3) << std::right << scores[i];
std::cout << row.str() << std::endl;
}
}
// Helper function for custom string formatting
std::string FormatString(const std::string& templateStr, const std::vector<std::string>& args) {
std::string result = templateStr;
for (size_t i = 0; i < args.size(); i++) {
std::string placeholder = "{" + std::to_string(i) + "}";
size_t pos = result.find(placeholder);
if (pos != std::string::npos) {
result.replace(pos, placeholder.length(), args[i]);
}
}
return result;
}
// 4. String splitting by different delimiters
void StringSplitting() {
std::cout << "\n=== String Splitting ===" << std::endl;
// Basic comma-separated splitting
std::string csv = "apple,banana,cherry,date,elderberry";
std::vector<std::string> fruits = SplitString(csv, ",");
std::cout << "CSV splitting - Original: " << csv << std::endl;
std::cout << "Results: ";
for (const auto& fruit : fruits) {
std::cout << "[" << fruit << "] ";
}
std::cout << std::endl;
// Split by whitespace
std::string sentence = "The quick brown fox jumps over the lazy dog";
std::vector<std::string> words = SplitString(sentence, " ");
std::cout << "\nWhitespace splitting - Original: " << sentence << std::endl;
std::cout << "Words (" << words.size() << "): ";
for (const auto& word : words) {
std::cout << word << " ";
}
std::cout << std::endl;
// Split with multiple character delimiter
std::string data = "key1=value1;key2=value2;key3=value3";
std::vector<std::string> pairs = SplitString(data, ";");
std::cout << "\nMultiple delimiter splitting - Original: " << data << std::endl;
for (const auto& pair : pairs) {
std::cout << "Pair: " << pair << std::endl;
// Further split by '='
std::vector<std::string> keyValue = SplitString(pair, "=");
if (keyValue.size() == 2) {
std::cout << " Key: " << keyValue[0] << ", Value: " << keyValue[1] << std::endl;
}
}
// Split using stringstream (better for whitespace handling)
std::stringstream ss(sentence);
std::vector<std::string> streamWords;
std::string word;
while (ss >> word) {
streamWords.push_back(word);
}
std::cout << "\nStringStream splitting - Words (" << streamWords.size() << "): ";
for (const auto& w : streamWords) {
std::cout << w << " ";
}
std::cout << std::endl;
// Split with empty token handling
std::string path = "/usr//local/bin/";
std::vector<std::string> pathParts = SplitStringKeepEmpty(path, "/");
std::cout << "\nPath splitting with empty tokens: " << path << std::endl;
for (size_t i = 0; i < pathParts.size(); i++) {
std::cout << " Part " << i << ": [" << pathParts[i] << "]" << std::endl;
}
}
// Basic string splitting function
std::vector<std::string> SplitString(const std::string& str, const std::string& delimiter) {
std::vector<std::string> result;
size_t start = 0;
size_t end = str.find(delimiter);
while (end != std::string::npos) {
if (start != end) { // Avoid empty strings
result.push_back(str.substr(start, end - start));
}
start = end + delimiter.length();
end = str.find(delimiter, start);
}
if (start < str.length()) {
result.push_back(str.substr(start));
}
return result;
}
// String splitting function that keeps empty tokens
std::vector<std::string> SplitStringKeepEmpty(const std::string& str, const std::string& delimiter) {
std::vector<std::string> result;
size_t start = 0;
size_t end = str.find(delimiter);
while (end != std::string::npos) {
result.push_back(str.substr(start, end - start));
start = end + delimiter.length();
end = str.find(delimiter, start);
}
result.push_back(str.substr(start));
return result;
}
// 5. Advanced splitting with multiple delimiters
void AdvancedSplitting() {
std::cout << "\n=== Advanced Splitting ===" << std::endl;
// Split by any whitespace character
std::string text = "Hello\tWorld\nThis is a test";
std::vector<std::string> whitespaceWords = SplitByWhitespace(text);
std::cout << "Original: '" << text << "'" << std::endl;
std::cout << "Split by whitespace: ";
for (const auto& word : whitespaceWords) {
std::cout << "[" << word << "] ";
}
std::cout << std::endl;
// Split by multiple specific delimiters
std::string data = "name=John,age=25;city=New York|country=USA";
std::vector<std::string> delimiters = {",", ";", "|"};
std::vector<std::string> parts = SplitByMultipleDelimiters(data, delimiters);
std::cout << "\nMulti-delimiter splitting:" << std::endl;
std::cout << "Original: " << data << std::endl;
for (const auto& part : parts) {
std::cout << " [" << part << "]" << std::endl;
}
// Split and limit number of splits
std::string limitedText = "one,two,three,four,five";
std::vector<std::string> limitedParts = SplitStringN(limitedText, ",", 3);
std::cout << "\nLimited splitting (max 3 parts):" << std::endl;
std::cout << "Original: " << limitedText << std::endl;
for (const auto& part : limitedParts) {
std::cout << " [" << part << "]" << std::endl;
}
}
// Split by any whitespace
std::vector<std::string> SplitByWhitespace(const std::string& str) {
std::vector<std::string> result;
std::istringstream iss(str);
std::string word;
while (iss >> word) {
result.push_back(word);
}
return result;
}
// Split by multiple delimiters
std::vector<std::string> SplitByMultipleDelimiters(const std::string& str,
const std::vector<std::string>& delimiters) {
std::vector<std::string> result;
if (delimiters.empty()) {
result.push_back(str);
return result;
}
// Use the first delimiter for initial split
std::vector<std::string> parts = SplitString(str, delimiters[0]);
// For each remaining delimiter, split all current parts
for (size_t i = 1; i < delimiters.size(); i++) {
std::vector<std::string> newParts;
for (const auto& part : parts) {
std::vector<std::string> splitParts = SplitString(part, delimiters[i]);
newParts.insert(newParts.end(), splitParts.begin(), splitParts.end());
}
parts = newParts;
}
return parts;
}
// Split string with maximum number of splits
std::vector<std::string> SplitStringN(const std::string& str, const std::string& delimiter, int maxSplits) {
std::vector<std::string> result;
size_t start = 0;
int splits = 0;
size_t end = str.find(delimiter);
while (end != std::string::npos && splits < maxSplits) {
result.push_back(str.substr(start, end - start));
start = end + delimiter.length();
end = str.find(delimiter, start);
splits++;
}
// Add the remaining part
result.push_back(str.substr(start));
return result;
}
// 6. Join operations (reverse of splitting)
void StringJoining() {
std::cout << "\n=== String Joining ===" << std::endl;
std::vector<std::string> words = {"The", "quick", "brown", "fox"};
std::vector<std::string> numbers = {"1", "2", "3", "4", "5"};
// Join with space
std::string sentence = JoinStrings(words, " ");
std::cout << "Joined with space: " << sentence << std::endl;
// Join with comma and space
std::string csv = JoinStrings(words, ", ");
std::cout << "CSV style: " << csv << std::endl;
// Join with prefix and suffix
std::string bracketed = JoinWithPrefixSuffix(numbers, "[", "]", " + ");
std::cout << "Math expression: " << bracketed << std::endl;
// Conditional joining (skip empty strings)
std::vector<std::string> mixed = {"Hello", "", "World", "", "C++"};
std::string cleanJoined = JoinSkipEmpty(mixed, " ");
std::cout << "Skip empty: " << cleanJoined << std::endl;
// Join with limits
std::vector<std::string> manyWords = {"These", "are", "many", "words", "to", "join"};
std::string limited = JoinWithLimit(manyWords, ", ", 3, "...");
std::cout << "Limited join: " << limited << std::endl;
}
// Basic join function
std::string JoinStrings(const std::vector<std::string>& strings, const std::string& delimiter) {
if (strings.empty()) {
return "";
}
std::string result = strings[0];
for (size_t i = 1; i < strings.size(); i++) {
result += delimiter + strings[i];
}
return result;
}
// Join with prefix and suffix around each element
std::string JoinWithPrefixSuffix(const std::vector<std::string>& strings,
const std::string& prefix, const std::string& suffix,
const std::string& delimiter) {
if (strings.empty()) {
return "";
}
std::string result = prefix + strings[0] + suffix;
for (size_t i = 1; i < strings.size(); i++) {
result += delimiter + prefix + strings[i] + suffix;
}
return result;
}
// Join skipping empty strings
std::string JoinSkipEmpty(const std::vector<std::string>& strings, const std::string& delimiter) {
std::vector<std::string> nonEmpty;
for (const auto& str : strings) {
if (!str.empty()) {
nonEmpty.push_back(str);
}
}
return JoinStrings(nonEmpty, delimiter);
}
// Join with limit on number of elements
std::string JoinWithLimit(const std::vector<std::string>& strings,
const std::string& delimiter, int maxElements,
const std::string& suffix) {
if (strings.empty()) {
return "";
}
std::string result = strings[0];
for (int i = 1; i < std::min(maxElements, (int)strings.size()); i++) {
result += delimiter + strings[i];
}
if (strings.size() > static_cast<size_t>(maxElements)) {
result += delimiter + suffix;
}
return result;
}
// 7. String parsing and extraction
void StringParsing() {
std::cout << "\n=== String Parsing and Extraction ===" << std::endl;
// Extract numbers from string
std::string text1 = "The temperature is 23.5 degrees and humidity is 45%";
std::vector<double> numbers = ExtractNumbers(text1);
std::cout << "Original: " << text1 << std::endl;
std::cout << "Extracted numbers: ";
for (double num : numbers) {
std::cout << num << " ";
}
std::cout << std::endl;
// Extract words with specific criteria
std::string text2 = "Hello world! This is a C++ string processing example.";
std::vector<std::string> longWords = ExtractWordsLongerThan(text2, 4);
std::cout << "\nOriginal: " << text2 << std::endl;
std::cout << "Words longer than 4 characters: ";
for (const auto& word : longWords) {
std::cout << word << " ";
}
std::cout << std::endl;
// Extract email addresses
std::string emailText = "Contact us at [email protected] or [email protected]";
std::vector<std::string> emails = ExtractEmails(emailText);
std::cout << "\nEmail extraction:" << std::endl;
std::cout << "Original: " << emailText << std::endl;
for (const auto& email : emails) {
std::cout << "Found email: " << email << std::endl;
}
// Parse key-value pairs
std::string configText = "name=John Doe; age=30; city=New York; country=USA";
std::map<std::string, std::string> config = ParseKeyValuePairs(configText, ";", "=");
std::cout << "\nKey-value parsing:" << std::endl;
std::cout << "Original: " << configText << std::endl;
for (const auto& [key, value] : config) {
std::cout << key << " = " << value << std::endl;
}
}
// Extract all numbers from string
std::vector<double> ExtractNumbers(const std::string& str) {
std::vector<double> numbers;
std::regex numberRegex(R"([-+]?\d*\.?\d+)");
std::sregex_iterator iter(str.begin(), str.end(), numberRegex);
std::sregex_iterator end;
for (; iter != end; ++iter) {
numbers.push_back(std::stod(iter->str()));
}
return numbers;
}
// Extract words longer than specified length
std::vector<std::string> ExtractWordsLongerThan(const std::string& str, int minLength) {
std::vector<std::string> result;
std::regex wordRegex(R"(\b\w+\b)");
std::sregex_iterator iter(str.begin(), str.end(), wordRegex);
std::sregex_iterator end;
for (; iter != end; ++iter) {
std::string word = iter->str();
if (word.length() > static_cast<size_t>(minLength)) {
result.push_back(word);
}
}
return result;
}
// Extract email addresses
std::vector<std::string> ExtractEmails(const std::string& str) {
std::vector<std::string> emails;
std::regex emailRegex(R"([\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,})");
std::sregex_iterator iter(str.begin(), str.end(), emailRegex);
std::sregex_iterator end;
for (; iter != end; ++iter) {
emails.push_back(iter->str());
}
return emails;
}
// Parse key-value pairs
std::map<std::string, std::string> ParseKeyValuePairs(const std::string& str,
const std::string& pairDelimiter,
const std::string& kvDelimiter) {
std::map<std::string, std::string> result;
std::vector<std::string> pairs = SplitString(str, pairDelimiter);
for (const auto& pair : pairs) {
std::vector<std::string> keyValue = SplitString(pair, kvDelimiter);
if (keyValue.size() == 2) {
// Trim whitespace
std::string key = Trim(keyValue[0]);
std::string value = Trim(keyValue[1]);
result[key] = value;
}
}
return result;
}
// Trim whitespace from string
std::string Trim(const std::string& str) {
size_t start = str.find_first_not_of(" \t\n\r");
if (start == std::string::npos) {
return "";
}
size_t end = str.find_last_not_of(" \t\n\r");
return str.substr(start, end - start + 1);
}
// 8. Unicode and UTF-8 string handling
void UnicodeHandling() {
std::cout << "\n=== Unicode and UTF-8 Handling ===" << std::endl;
// UTF-8 string processing
std::string utf8Text = u8"Hello 世界 🌍 C++";
std::cout << "UTF-8 text: " << utf8Text << std::endl;
std::cout << "UTF-8 byte length: " << utf8Text.length() << std::endl;
// Wide string conversion
std::wstring wideText = L"Hello World C++";
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::string converted = converter.to_bytes(wideText);
std::wstring backConverted = converter.from_bytes(converted);
std::cout << "Wide string: " << wideText.length() << " characters" << std::endl;
std::cout << "Converted to UTF-8: " << converted << std::endl;
std::cout << "Converted back: " << backConverted.length() << " characters" << std::endl;
// Count Unicode characters (simplified approach)
std::string unicodeText = u8"Привет мир 🌍";
int charCount = CountUnicodeCharacters(unicodeText);
std::cout << "Unicode text: " << unicodeText << std::endl;
std::cout << "Unicode character count: " << charCount << std::endl;
// Case conversion for Unicode
std::string lowerText = "hello world";
std::string upperText = ToUpper(lowerText);
std::string backToLower = ToLower(upperText);
std::cout << "\nCase conversion:" << std::endl;
std::cout << "Original: " << lowerText << std::endl;
std::cout << "Upper: " << upperText << std::endl;
std::cout << "Lower: " << backToLower << std::endl;
}
// Count Unicode characters (simplified UTF-8)
int CountUnicodeCharacters(const std::string& utf8String) {
int count = 0;
for (size_t i = 0; i < utf8String.length(); ) {
unsigned char c = utf8String[i];
if (c < 0x80) {
// ASCII character (1 byte)
i += 1;
} else if ((c >> 5) == 0x6) {
// 2-byte UTF-8 sequence
i += 2;
} else if ((c >> 4) == 0xE) {
// 3-byte UTF-8 sequence
i += 3;
} else if ((c >> 3) == 0x1E) {
// 4-byte UTF-8 sequence
i += 4;
} else {
// Invalid UTF-8, skip this byte
i += 1;
}
count++;
}
return count;
}
// Convert to uppercase (ASCII only for simplicity)
std::string ToUpper(const std::string& str) {
std::string result = str;
for (char& c : result) {
c = std::toupper(static_cast<unsigned char>(c));
}
return result;
}
// Convert to lowercase (ASCII only for simplicity)
std::string ToLower(const std::string& str) {
std::string result = str;
for (char& c : result) {
c = std::tolower(static_cast<unsigned char>(c));
}
return result;
}
int main() {
std::cout << "=== C++ Windows String Processing ===" << std::endl;
std::cout << "Demonstrating comprehensive string manipulation techniques\n" << std::endl;
try {
// Run all string processing examples
BasicConcatenation();
PerformanceComparison();
AdvancedConcatenation();
StringSplitting();
AdvancedSplitting();
StringJoining();
StringParsing();
UnicodeHandling();
std::cout << "\nAll string processing examples completed successfully!" << std::endl;
} catch (const std::exception& e) {
std::cerr << "Unexpected error: " << e.what() << std::endl;
return 1;
}
return 0;
}
💻 Сопоставление с Шаблонами Регулярных Выражений cpp
🔴 complex
⭐⭐⭐
Обработка текста и сопоставление сложных шаблонов с использованием библиотеки regex C++ с расширенными шаблонами и валидацией
⏱️ 30 min
🏷️ cpp, string processing, regex, algorithms, windows
Prerequisites:
Regular expressions, C++ regex library, Pattern syntax, Text processing
#include <iostream>
#include <string>
#include <regex>
#include <vector>
#include <map>
#include <sstream>
#include <iomanip>
#include <algorithm>
// 1. Basic regex operations
void BasicRegexOperations() {
std::cout << "=== Basic Regex Operations ===" << std::endl;
// Simple pattern matching
std::string text = "Hello World 123";
std::regex pattern(R"(\w+)"); // Match word characters
std::cout << "Text: " << text << std::endl;
std::cout << "Pattern: \w+ (word characters)" << std::endl;
std::sregex_iterator iter(text.begin(), text.end(), pattern);
std::sregex_iterator end;
std::cout << "Matches found:" << std::endl;
for (; iter != end; ++iter) {
std::smatch match = *iter;
std::cout << " " << match.str() << " at position " << match.position() << std::endl;
}
// Check if pattern matches entire string
std::regex exactPattern(R"(Hello\s+World\s+\d+)");
bool exactMatch = std::regex_match(text, exactPattern);
std::cout << "\nExact match (Hello World 123): " << (exactMatch ? "Yes" : "No") << std::endl;
// Search for pattern in string
std::regex searchPattern(R"(\d+)");
bool found = std::regex_search(text, searchPattern);
std::cout << "Search for digits: " << (found ? "Found" : "Not found") << std::endl;
}
// 2. Pattern matching with groups
void PatternMatchingWithGroups() {
std::cout << "\n=== Pattern Matching with Groups ===" << std::endl;
// Extract date components
std::string dateText = "Date: 2023-12-25, Time: 14:30:45";
std::regex datePattern(R"(Date: (\d{4})-(\d{2})-(\d{2}))");
std::smatch dateMatch;
if (std::regex_search(dateText, dateMatch, datePattern)) {
std::cout << "Date text: " << dateText << std::endl;
std::cout << "Full match: " << dateMatch[0].str() << std::endl;
std::cout << "Year: " << dateMatch[1].str() << std::endl;
std::cout << "Month: " << dateMatch[2].str() << std::endl;
std::cout << "Day: " << dateMatch[3].str() << std::endl;
}
// Extract email components
std::string email = "[email protected]";
std::regex emailPattern(R"((\w+[\.\w]*\w+)@(\w+[\.\w]*\w+)\.(\w+))");
std::smatch emailMatch;
if (std::regex_match(email, emailMatch, emailPattern)) {
std::cout << "\nEmail: " << email << std::endl;
std::cout << "Username: " << emailMatch[1].str() << std::endl;
std::cout << "Domain: " << emailMatch[2].str() << std::endl;
std::cout << "TLD: " << emailMatch[4].str() << std::endl;
}
// Extract URL components
std::string url = "https://www.example.com/path/to/page?param1=value1¶m2=value2";
std::regex urlPattern(R"((https?://)([^/]+)([^?]*)(\?(.*))?)");
std::smatch urlMatch;
if (std::regex_match(url, urlMatch, urlPattern)) {
std::cout << "\nURL: " << url << std::endl;
std::cout << "Protocol: " << urlMatch[1].str() << std::endl;
std::cout << "Domain: " << urlMatch[2].str() << std::endl;
std::cout << "Path: " << urlMatch[3].str() << std::endl;
std::cout << "Query string: " << urlMatch[5].str() << std::endl;
}
}
// 3. Text validation patterns
void TextValidation() {
std::cout << "\n=== Text Validation Patterns ===" << std::endl;
// Email validation
std::vector<std::string> emails = {
"[email protected]",
"[email protected]",
"[email protected]",
"invalid.email@",
"@missing-user.com",
"[email protected]"
};
std::regex emailPattern(
R"(^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}$)"
);
std::cout << "Email validation:" << std::endl;
for (const auto& email : emails) {
bool isValid = std::regex_match(email, emailPattern);
std::cout << email << " - " << (isValid ? "Valid" : "Invalid") << std::endl;
}
// Phone number validation
std::vector<std::string> phones = {
"123-456-7890",
"(123) 456-7890",
"+1 (123) 456-7890",
"123.456.7890",
"1234567890",
"invalid-phone"
};
std::regex phonePattern(
R"(^\+?1?[.-\s]?\(?\d{3}\)?[.-\s]?\d{3}[.-\s]?\d{4}$)"
);
std::cout << "\nPhone number validation:" << std::endl;
for (const auto& phone : phones) {
bool isValid = std::regex_match(phone, phonePattern);
std::cout << phone << " - " << (isValid ? "Valid" : "Invalid") << std::endl;
}
// Password strength validation
std::vector<std::string> passwords = {
"password", // Weak: lowercase only
"Password123", // Medium: mixed case, numbers
"P@ssw0rd!123", // Strong: mixed case, numbers, special chars
"Short1!", // Weak: too short
"VeryLongPasswordWithNoNumbersOrSpecialChars", // Medium: long but no variety
"Str0ng#P@ss" // Strong: good mix
};
std::regex strongPasswordPattern(
R"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$)"
);
std::cout << "\nPassword strength validation:" << std::endl;
for (const auto& password : passwords) {
bool isStrong = std::regex_match(password, strongPasswordPattern);
std::cout << " '" << password << "' - " << (isStrong ? "Strong" : "Weak/Invalid") << std::endl;
}
}
// 4. Text extraction and replacement
void TextExtractionAndReplacement() {
std::cout << "\n=== Text Extraction and Replacement ===" << std::endl;
std::string content = R"(
Contact Information:
Email: [email protected]
Phone: (555) 123-4567
Address: 123 Main St, City, State 12345
Website: https://www.example.com
Additional contact: [email protected]
)";
std::cout << "Original content:" << content << std::endl;
// Extract all emails
std::regex emailPattern(R"([\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,})");
std::vector<std::string> extractedEmails;
std::sregex_iterator emailIter(content.begin(), content.end(), emailPattern);
std::sregex_iterator emailEnd;
std::cout << "\nExtracted emails:" << std::endl;
for (; emailIter != emailEnd; ++emailIter) {
std::string email = emailIter->str();
extractedEmails.push_back(email);
std::cout << " " << email << std::endl;
}
// Extract phone numbers
std::regex phonePattern(R"(\(\d{3}\)\s*\d{3}[-\s]?\d{4})");
std::vector<std::string> extractedPhones;
std::sregex_iterator phoneIter(content.begin(), content.end(), phonePattern);
std::sregex_iterator phoneEnd;
std::cout << "\nExtracted phone numbers:" << std::endl;
for (; phoneIter != phoneEnd; ++phoneIter) {
std::string phone = phoneIter->str();
extractedPhones.push_back(phone);
std::cout << " " << phone << std::endl;
}
// Replace emails with placeholder
std::string replacedContent = std::regex_replace(
content, emailPattern, "[EMAIL PROTECTED]"
);
std::cout << "\nContent after email replacement:" << std::endl;
std::cout << replacedContent << std::endl;
// Replace phone numbers with formatted version
std::string formattedContent = std::regex_replace(
content,
phonePattern,
"Phone: $1" // $1 captures the entire phone number group
);
std::cout << "\nContent with formatted phones:" << std::endl;
std::cout << formattedContent << std::endl;
// Case-insensitive replacement
std::string caseInsensitiveText = "Hello WORLD, hello World, HELLO world!";
std::regex casePattern(R"(hello)", std::regex_constants::icase);
std::string caseReplaced = std::regex_replace(
caseInsensitiveText, casePattern, "Hi"
);
std::cout << "\nCase-insensitive replacement:" << std::endl;
std::cout << "Original: " << caseInsensitiveText << std::endl;
std::cout << "Replaced: " << caseReplaced << std::endl;
}
// 5. Advanced pattern matching
void AdvancedPatternMatching() {
std::cout << "\n=== Advanced Pattern Matching ===" << std::endl;
// Match nested parentheses (simplified)
std::string nestedText = "(a(b(c)d)e)f(g)h";
std::regex nestedPattern(R"([^()]*\([^()]*\))*[^()]*)");
std::cout << "Nested parentheses check:" << std::endl;
std::cout << "String: " << nestedText << std::endl;
std::cout << "Pattern matches: " << (std::regex_match(nestedText, nestedPattern) ? "Yes" : "No") << std::endl;
// Find palindromes (simplified for demonstration)
std::vector<std::string> words = {
"level", "hello", "radar", "world", "madam", "c++", "refer"
};
std::cout << "\nPalindrome detection:" << std::endl;
for (const auto& word : words) {
bool isPalindrome = IsPalindrome(word);
std::cout << word << " - " << (isPalindrome ? "Palindrome" : "Not palindrome") << std::endl;
}
// Extract quoted text
std::string quotedText = R"("Hello" she said, "How are you?" "I'm fine" he replied.)";
std::regex quotePattern(R"("([^"]*)")");
std::vector<std::string> quotes;
std::sregex_iterator quoteIter(quotedText.begin(), quotedText.end(), quotePattern);
std::sregex_iterator quoteEnd;
std::cout << "\nExtracted quotes:" << std::endl;
for (; quoteIter != quoteEnd; ++quoteIter) {
std::smatch match = *quoteIter;
quotes.push_back(match[1].str()); // Group 1 contains content inside quotes
std::cout << " "" << match[1].str() << """ << std::endl;
}
// Find repeated patterns
std::string repeatText = "abcabcabc xyzxyz abcabcabcabc";
std::regex repeatPattern(R"((\w+)\1+)");
std::cout << "\nRepeated patterns in: " << repeatText << std::endl;
std::sregex_iterator repeatIter(repeatText.begin(), repeatText.end(), repeatPattern);
std::sregex_iterator repeatEnd;
for (; repeatIter != repeatEnd; ++repeatIter) {
std::smatch match = *repeatIter;
std::cout << " Found repeat of: '" << match[1].str() << "' -> '" << match[0].str() << "'" << std::endl;
}
// Match balanced tags (XML/HTML like)
std::string xmlText = "<div><p>Hello</p><span>World</span></div>";
std::regex tagPattern(R"(<(\w+)>.*?</\2>)");
std::cout << "\nXML-like tag matching:" << std::endl;
std::cout << "String: " << xmlText << std::endl;
std::cout << "Has balanced tags: " << (std::regex_match(xmlText, tagPattern) ? "Yes" : "No") << std::endl;
}
// Helper function to check palindrome (simplified)
bool IsPalindrome(const std::string& str) {
std::string cleanStr;
for (char c : str) {
if (std::isalnum(c)) {
cleanStr += std::tolower(c);
}
}
return cleanStr == std::string(cleanStr.rbegin(), cleanStr.rend());
}
// 6. Performance comparison
void RegexPerformanceComparison() {
std::cout << "\n=== Regex Performance Comparison ===" << std::endl;
const int ITERATIONS = 10000;
std::string testText = "This is a test string with multiple words and some numbers like 123 and 4567.";
// Test different regex approaches
std::vector<std::pair<std::string, std::regex>> patterns = {
{ "Simple word", std::regex(R"(\w+)") },
{ "Number pattern", std::regex(R"(\d+)") },
{ "Complex pattern", std::regex(R"([A-Z][a-z]+\s+[a-z]+)") },
{ "Email pattern", std::regex(R"([\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,})") }
};
for (const auto& [name, pattern] : patterns) {
auto start = std::chrono::high_resolution_clock::now();
int matches = 0;
for (int i = 0; i < ITERATIONS; i++) {
std::sregex_iterator iter(testText.begin(), testText.end(), pattern);
std::sregex_iterator end;
for (; iter != end; ++iter) {
matches++;
}
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << name << ": " << duration.count() << " μs ("
<< matches << " matches found)" << std::endl;
}
// Compare regex_match vs regex_search vs regex_iterator
std::regex simplePattern(R"([^\s]+)");
std::string searchText = "one two three four five six seven eight nine ten";
// Using regex_iterator (most flexible)
auto start1 = std::chrono::high_resolution_clock::now();
for (int i = 0; i < ITERATIONS; i++) {
std::sregex_iterator iter(searchText.begin(), searchText.end(), simplePattern);
std::sregex_iterator end;
int count = 0;
for (; iter != end; ++iter) {
count++;
}
}
auto end1 = std::chrono::high_resolution_clock::now();
auto duration1 = std::chrono::duration_cast<std::chrono::microseconds>(end1 - start1);
// Using manual search with regex_search
auto start2 = std::chrono::high_resolution_clock::now();
for (int i = 0; i < ITERATIONS; i++) {
std::smatch match;
std::string remaining = searchText;
int count = 0;
while (std::regex_search(remaining, match, simplePattern)) {
count++;
remaining = match.suffix();
}
}
auto end2 = std::chrono::high_resolution_clock::now();
auto duration2 = std::chrono::duration_cast<std::chrono::microseconds>(end2 - start2);
std::cout << "\nMethod comparison for word extraction:" << std::endl;
std::cout << "regex_iterator: " << duration1.count() << " μs" << std::endl;
std::cout << "regex_search loop: " << duration2.count() << " μs" << std::endl;
}
// 7. Practical text processing examples
void PracticalTextProcessing() {
std::cout << "\n=== Practical Text Processing Examples ===" << std::endl;
// Log file parsing
std::vector<std::string> logLines = {
"2023-12-25 10:30:45 [INFO] Application started",
"2023-12-25 10:31:02 [ERROR] Connection failed to database",
"2023-12-25 10:31:15 [WARNING] Retrying connection",
"2023-12-25 10:31:30 [INFO] Connection established",
"2023-12-25 10:32:00 [DEBUG] Processing request #12345"
};
std::regex logPattern(R"((\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\s+\[(\w+)\]\s+(.*))");
std::map<std::string, std::vector<std::string>> logEntries;
std::cout << "Parsing log file entries:" << std::endl;
for (const auto& line : logLines) {
std::smatch match;
if (std::regex_match(line, match, logPattern)) {
std::string timestamp = match[1].str();
std::string level = match[2].str();
std::string message = match[3].str();
logEntries[level].push_back(message);
std::cout << level << ": " << message << " (" << timestamp << ")" << std::endl;
}
}
// Code comment extraction
std::string codeWithComments = R"(
// This is a single line comment
int x = 5; /* This is a block comment */
/*
* Multi-line comment
* spanning multiple lines
*/
int y = 10; // Another comment
)";
std::regex commentPattern(R"((//.*?$|/\*.*?\*/))");
std::vector<std::string> comments;
std::sregex_iterator commentIter(codeWithComments.begin(), codeWithComments.end(),
commentPattern, std::regex_constants::mod_s);
std::sregex_iterator commentEnd;
std::cout << "\nExtracted code comments:" << std::endl;
for (; commentIter != commentEnd; ++commentIter) {
std::string comment = commentIter->str();
// Clean up multi-line comments
comment = std::regex_replace(comment, std::regex(R"(\s*
\s*\*)"), " ");
comment = std::regex_replace(comment, std::regex(R"(/\*|\*/)"), "");
comment = std::regex_replace(comment, std::regex(R"(//\s*)"), "");
// Trim whitespace
comment.erase(0, comment.find_first_not_of(" \t"));
comment.erase(comment.find_last_not_of(" \t") + 1);
if (!comment.empty()) {
comments.push_back(comment);
std::cout << " " << comment << std::endl;
}
}
// Data cleaning
std::string dirtyData = R"(
John Doe, 25, New York
Jane Smith,30,Los Angeles
Bob Johnson, 35 , Chicago
Alice Williams,28,Boston
)";
std::cout << "\nData cleaning example:" << std::endl;
std::cout << "Original: " << dirtyData << std::endl;
// Clean and format data
std::vector<std::string> cleanedLines;
std::istringstream iss(dirtyData);
std::string line;
while (std::getline(iss, line)) {
// Remove leading/trailing whitespace
line = std::regex_replace(line, std::regex(R"(^\s+|\s+$)"), "");
// Replace multiple spaces with single comma
line = std::regex_replace(line, std::regex(R"(\s+,\s*|\s+,|,\s+)"), ", ");
if (!line.empty()) {
cleanedLines.push_back(line);
}
}
std::cout << "Cleaned data:" << std::endl;
for (const auto& cleanLine : cleanedLines) {
std::cout << " " << cleanLine << std::endl;
}
}
int main() {
std::cout << "=== C++ Windows String Processing - Regular Expressions ===" << std::endl;
std::cout << "Demonstrating comprehensive regex pattern matching and text processing\n" << std::endl;
try {
// Run all regex examples
BasicRegexOperations();
PatternMatchingWithGroups();
TextValidation();
TextExtractionAndReplacement();
AdvancedPatternMatching();
RegexPerformanceComparison();
PracticalTextProcessing();
std::cout << "\nAll regex pattern matching examples completed successfully!" << std::endl;
} catch (const std::exception& e) {
std::cerr << "Unexpected error: " << e.what() << std::endl;
return 1;
}
return 0;
}
💻 Замена и Преобразование Строк cpp
🔴 complex
⭐⭐⭐⭐
Продвинутые техники замены строк включая массовые операции, условную замену и конвейеры преобразования текста
⏱️ 35 min
🏷️ cpp, string processing, algorithms, windows
Prerequisites:
Advanced string manipulation, Regular expressions, Performance optimization, Text processing algorithms
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <regex>
#include <sstream>
#include <algorithm>
#include <cctype>
#include <unordered_map>
#include <chrono>
// 1. Basic string replacement
void BasicStringReplacement() {
std::cout << "=== Basic String Replacement ===" << std::endl;
std::string text = "The quick brown fox jumps over the lazy dog.";
std::cout << "Original: " << text << std::endl;
// Simple replacement
std::string replaced1 = text;
size_t pos = replaced1.find("fox");
if (pos != std::string::npos) {
replaced1.replace(pos, 3, "cat");
std::cout << "Replace 'fox' with 'cat': " << replaced1 << std::endl;
}
// Replace all occurrences
std::string replaced2 = text;
std::string from = "the";
std::string to = "a";
size_t startPos = 0;
while ((startPos = replaced2.find(from, startPos)) != std::string::npos) {
replaced2.replace(startPos, from.length(), to);
startPos += to.length(); // Move past the replacement
}
std::cout << "Replace all 'the' with 'a': " << replaced2 << std::endl;
// Case-insensitive replacement (manual implementation)
std::string replaced3 = text;
std::string target = "THE";
std::string replacement = "A";
// Convert both to lowercase for comparison, but preserve original case in replacement
std::string lowerText = replaced3;
std::transform(lowerText.begin(), lowerText.end(), lowerText.begin(), ::tolower);
size_t currentPos = 0;
size_t originalPos = 0;
while ((currentPos = lowerText.find(target, currentPos)) != std::string::npos) {
size_t replaceStart = currentPos + (originalPos - lowerText.find(lowerText, originalPos));
replaced3.replace(replaceStart, target.length(), replacement);
lowerText.replace(currentPos, target.length(), replacement);
currentPos += replacement.length();
originalPos += replacement.length();
}
std::cout << "Case-insensitive 'the' -> 'A': " << replaced3 << std::endl;
}
// 2. Using regex for advanced replacement
void RegexReplacement() {
std::cout << "\n=== Regex Replacement ===" << std::endl;
std::string text = "Contact: [email protected], Phone: (555) 123-4567, Alt: (555) 987-6543";
std::cout << "Original: " << text << std::endl;
// Replace all email addresses with placeholder
std::regex emailPattern(R"([\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,})");
std::string emailReplaced = std::regex_replace(text, emailPattern, "[EMAIL PROTECTED]");
std::cout << "Emails replaced: " << emailReplaced << std::endl;
// Replace phone numbers with standardized format
std::regex phonePattern(R"(\(\d{3}\)\s*(\d{3})[-\s]?(\d{4}))");
std::string phoneReplaced = std::regex_replace(text, phonePattern, "$1$2$3");
std::cout << "Phone numbers standardized: " << phoneReplaced << std::endl;
// Multiple replacements in sequence
std::string multiText = "Price: $19.99, Discount: 15%, Tax: 8.5%, Total: $22.90";
std::cout << "\nOriginal pricing: " << multiText << std::endl;
// Remove currency symbols
std::string noCurrency = std::regex_replace(multiText, std::regex(R"(\$)"), "");
std::cout << "Currency removed: " << noCurrency << std::endl;
// Format percentages consistently
std::regex percentPattern(R"(\b(\d+(?:\.\d+)?)%\b)");
std::string formattedPercents = std::regex_replace(noCurrency, percentPattern, "$1 percent");
std::cout << "Formatted percentages: " << formattedPercents << std::endl;
// Extract and format numbers
std::regex numberPattern(R"(\b(\d+(?:\.\d+)?)\b)");
std::string formattedNumbers = std::regex_replace(formattedPercents, numberPattern, "$1.00");
std::cout << "Numbers with .00: " << formattedNumbers << std::endl;
}
// 3. Conditional replacement
void ConditionalReplacement() {
std::cout << "\n=== Conditional Replacement ===" << std::endl;
std::vector<std::string> words = {"cat", "dog", "mouse", "elephant", "ant", "giraffe"};
std::cout << "Original words: ";
for (const auto& word : words) {
std::cout << word << " ";
}
std::cout << std::endl;
// Replace based on word length
std::vector<std::string> lengthReplaced;
for (const auto& word : words) {
if (word.length() <= 3) {
lengthReplaced.push_back("SMALL");
} else if (word.length() <= 5) {
lengthReplaced.push_back("MEDIUM");
} else {
lengthReplaced.push_back("LARGE");
}
}
std::cout << "Length-based replacement: ";
for (const auto& word : lengthReplaced) {
std::cout << word << " ";
}
std::cout << std::endl;
// Replace based on starting letter
std::vector<std::string> letterReplaced;
for (const auto& word : words) {
if (!word.empty()) {
char first = tolower(word[0]);
if (first >= 'a' && first <= 'm') {
letterReplaced.push_back("FIRST_HALF");
} else {
letterReplaced.push_back("SECOND_HALF");
}
}
}
std::cout << "Alphabet-based replacement: ";
for (const auto& word : letterReplaced) {
std::cout << word << " ";
}
std::cout << std::endl;
// Replace words containing specific patterns
std::string text = "The bakery sells bread, cakes, and cookies. The bread is fresh.";
std::cout << "\nOriginal: " << text << std::endl;
// Words containing 'bread' -> replace with '[BAKERY ITEM]'
std::vector<std::string> wordsInText = SplitWords(text);
for (auto& word : wordsInText) {
if (word.find("bread") != std::string::npos) {
word = "[BAKERY ITEM]";
}
}
std::string conditionalText = JoinWords(wordsInText);
std::cout << "Conditional replacement: " << conditionalText << std::endl;
}
// Helper function to split text into words
std::vector<std::string> SplitWords(const std::string& text) {
std::vector<std::string> words;
std::istringstream iss(text);
std::string word;
while (iss >> word) {
// Remove punctuation
word.erase(std::remove_if(word.begin(), word.end(), ::ispunct), word.end());
if (!word.empty()) {
words.push_back(word);
}
}
return words;
}
// Helper function to join words
std::string JoinWords(const std::vector<std::string>& words) {
std::string result;
for (size_t i = 0; i < words.size(); i++) {
result += words[i];
if (i < words.size() - 1) {
result += " ";
}
}
return result;
}
// 4. Multiple simultaneous replacements
void MultipleReplacements() {
std::cout << "\n=== Multiple Simultaneous Replacements ===" << std::endl;
std::string text = "Hello World! How are you today? I hope you're doing well. Have a great day!";
std::cout << "Original: " << text << std::endl;
// Define replacement map
std::unordered_map<std::string, std::string> replacements = {
{"Hello", "Hi"},
{"World", "Everyone"},
{"great", "wonderful"},
{"well", "fine"},
{"hope", "wish"}
};
// Apply all replacements
std::string multiReplaced = ApplyMultipleReplacements(text, replacements);
std::cout << "Multiple replacements: " << multiReplaced << std::endl;
// Case-insensitive multiple replacements
std::unordered_map<std::string, std::string> caseInsensitiveReplacements = {
{"HELLO", "Hi"},
{"world", "Everyone"},
{"YOU", "you"},
{"today", "now"},
{"DAY", "evening"}
};
std::string caseInsensitiveResult = ApplyCaseInsensitiveReplacements(text, caseInsensitiveReplacements);
std::cout << "Case-insensitive replacements: " << caseInsensitiveResult << std::endl;
// Pattern-based replacements
std::unordered_map<std::string, std::string> patternReplacements = {
{R"(\b\w{4}\b)", "FOUR"}, // 4-letter words
{R"(\b\w+\!)"), "EXCLAMATED!"}, // Words ending with !
{R"([?])"}, "?"} // Just question marks (no change, demonstration)
};
std::string patternResult = ApplyPatternReplacements(text, patternReplacements);
std::cout << "Pattern-based replacements: " << patternResult << std::endl;
}
// Apply multiple word replacements
std::string ApplyMultipleReplacements(const std::string& text,
const std::unordered_map<std::string, std::string>& replacements) {
std::string result = text;
for (const auto& [from, to] : replacements) {
size_t pos = 0;
while ((pos = result.find(from, pos)) != std::string::npos) {
result.replace(pos, from.length(), to);
pos += to.length();
}
}
return result;
}
// Apply case-insensitive replacements
std::string ApplyCaseInsensitiveReplacements(const std::string& text,
const std::unordered_map<std::string, std::string>& replacements) {
std::string result = text;
for (const auto& [from, to] : replacements) {
// Convert both to lowercase for comparison
std::string lowerResult = result;
std::string lowerFrom = from;
std::transform(lowerResult.begin(), lowerResult.end(), lowerResult.begin(), ::tolower);
std::transform(lowerFrom.begin(), lowerFrom.end(), lowerFrom.begin(), ::tolower);
size_t pos = 0;
while ((pos = lowerResult.find(lowerFrom, pos)) != std::string::npos) {
result.replace(pos, from.length(), to);
lowerResult.replace(pos, from.length(), to);
pos += to.length();
}
}
return result;
}
// Apply regex pattern replacements
std::string ApplyPatternReplacements(const std::string& text,
const std::unordered_map<std::string, std::string>& replacements) {
std::string result = text;
for (const auto& [pattern, replacement] : replacements) {
try {
std::regex regex(pattern);
result = std::regex_replace(result, regex, replacement);
} catch (const std::regex_error& e) {
std::cout << "Invalid regex pattern '" << pattern << "': " << e.what() << std::endl;
}
}
return result;
}
// 5. Text transformation pipelines
void TransformationPipelines() {
std::cout << "\n=== Text Transformation Pipelines ===" << std::endl;
std::string rawText = R"(
THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG!!!
This is a test... With punctuation??? And numbers 12345.
Email: [email protected], Phone: (555) 123-4567
)";
std::cout << "Original text:" << std::endl;
std::cout << rawText << std::endl;
// Pipeline 1: Normalization
std::string normalized = TextNormalizationPipeline(rawText);
std::cout << "\n=== Normalization Pipeline ===" << std::endl;
std::cout << normalized << std::endl;
// Pipeline 2: Data extraction
std::string extracted = DataExtractionPipeline(rawText);
std::cout << "\n=== Data Extraction Pipeline ===" << std::endl;
std::cout << extracted << std::endl;
// Pipeline 3: Anonymization
std::string anonymized = AnonymizationPipeline(rawText);
std::cout << "\n=== Anonymization Pipeline ===" << std::endl;
std::cout << anonymized << std::endl;
// Pipeline 4: Formatting
std::string formatted = FormattingPipeline(rawText);
std::cout << "\n=== Formatting Pipeline ===" << std::endl;
std::cout << formatted << std::endl;
}
// Pipeline 1: Text normalization
std::string TextNormalizationPipeline(const std::string& text) {
std::string result = text;
// Step 1: Convert to lowercase
std::transform(result.begin(), result.end(), result.begin(), ::tolower);
// Step 2: Remove extra whitespace
result = std::regex_replace(result, std::regex(R"(\s+)"), " ");
// Step 3: Remove punctuation
result = std::regex_replace(result, std::regex(R"([^a-z0-9\s])"), "");
// Step 4: Trim leading/trailing whitespace
result.erase(0, result.find_first_not_of(" \t\n\r"));
result.erase(result.find_last_not_of(" \t\n\r") + 1);
return result;
}
// Pipeline 2: Data extraction
std::string DataExtractionPipeline(const std::string& text) {
std::string result = text;
// Step 1: Extract emails and mark them
std::regex emailPattern(R"([\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,})");
result = std::regex_replace(result, emailPattern, "[EMAIL:$&]");
// Step 2: Extract phone numbers and mark them
std::regex phonePattern(R"(\(\d{3}\)\s*\d{3}[-\s]?\d{4})");
result = std::regex_replace(result, phonePattern, "[PHONE:$&]");
// Step 3: Extract numbers and mark them
std::regex numberPattern(R"(\b\d+\b)");
result = std::regex_replace(result, numberPattern, "[NUM:$&]");
// Step 4: Convert to uppercase for visibility
std::transform(result.begin(), result.end(), result.begin(), ::toupper);
return result;
}
// Pipeline 3: Anonymization
std::string AnonymizationPipeline(const std::string& text) {
std::string result = text;
// Step 1: Replace emails with generic placeholder
std::regex emailPattern(R"([\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,})");
result = std::regex_replace(result, emailPattern, "[email protected]");
// Step 2: Replace phone numbers
std::regex phonePattern(R"(\(\d{3}\)\s*\d{3}[-\s]?\d{4})");
result = std::regex_replace(result, phonePattern, "(XXX) XXX-XXXX");
// Step 3: Replace proper nouns (simplified - capitalize words)
std::regex wordPattern(R"(\b[A-Z][a-z]+\b)");
result = std::regex_replace(result, wordPattern, "[PROPER_NOUN]");
// Step 4: Replace numbers with ranges
std::regex numberPattern(R"(\b\d+\b)");
result = std::regex_replace(result, numberPattern, "[NUMBER]");
return result;
}
// Pipeline 4: Text formatting
std::string FormattingPipeline(const std::string& text) {
std::string result = text;
// Step 1: Add paragraph breaks
result = std::regex_replace(result, std::regex(R"(\s*\n\s*)"), "
");
// Step 2: Capitalize sentences
std::regex sentencePattern(R"([.!?]\s*([a-z]))");
result = std::regex_replace(result, sentencePattern, [](const std::smatch& m) {
return m[0].str().substr(0, m[0].str().length() - 1) +
std::string(1, std::toupper(m[1].str()[0]));
});
// Step 3: Ensure proper spacing around punctuation
result = std::regex_replace(result, std::regex(R"(\s*([,.!?])\s*)"), "$1 ");
result = std::regex_replace(result, std::regex(R"(\s+([,.!?])\s+)"), "$1 ");
result = std::regex_replace(result, std::regex(R"(\s+$)"), "");
// Step 4: Ensure first letter is capitalized
if (!result.empty() && std::islower(result[0])) {
result[0] = std::toupper(result[0]);
}
return result;
}
// 6. Performance-optimized bulk replacement
void PerformanceOptimizedReplacement() {
std::cout << "\n=== Performance-Optimized Bulk Replacement ===" << std::endl;
// Generate large test data
const int SIZE = 10000;
std::string testData;
testData.reserve(SIZE * 20); // Pre-allocate
std::vector<std::string> words = {"apple", "banana", "cherry", "date", "elderberry"};
for (int i = 0; i < SIZE; i++) {
testData += words[i % words.size()] + " ";
}
testData += "END";
std::cout << "Generated test data with " << testData.length() << " characters" << std::endl;
// Replacement map
std::unordered_map<std::string, std::string> replacements = {
{"apple", "FRUIT"},
{"banana", "TROPICAL"},
{"cherry", "BERRY"},
{"date", "PALM"},
{"elderberry", "ANTIOXIDANT"}
};
// Method 1: Sequential find/replace
auto start1 = std::chrono::high_resolution_clock::now();
std::string result1 = testData;
for (const auto& [from, to] : replacements) {
size_t pos = 0;
while ((pos = result1.find(from, pos)) != std::string::npos) {
result1.replace(pos, from.length(), to);
pos += to.length();
}
}
auto end1 = std::chrono::high_resolution_clock::now();
auto duration1 = std::chrono::duration_cast<std::chrono::milliseconds>(end1 - start1);
// Method 2: Build new string (more efficient for many replacements)
auto start2 = std::chrono::high_resolution_clock::now();
std::string result2;
result2.reserve(testData.length() * 2); // Pre-allocate
std::string current;
for (char c : testData) {
current += c;
// Check if current token matches any replacement
auto it = replacements.find(current);
if (it != replacements.end()) {
result2 += it->second;
current.clear();
} else if (c == ' ' || c == '\t' || c == '\n') {
result2 += current;
result2 += c;
current.clear();
}
}
result2 += current; // Add remaining characters
auto end2 = std::chrono::high_resolution_clock::now();
auto duration2 = std::chrono::duration_cast<std::chrono::milliseconds>(end2 - start2);
// Method 3: Using regex (usually slower but more flexible)
auto start3 = std::chrono::high_resolution_clock::now();
std::string result3 = testData;
for (const auto& [from, to] : replacements) {
std::regex pattern("\b" + from + "\b");
result3 = std::regex_replace(result3, pattern, to);
}
auto end3 = std::chrono::high_resolution_clock::now();
auto duration3 = std::chrono::duration_cast<std::chrono::milliseconds>(end3 - start3);
std::cout << "Performance comparison:" << std::endl;
std::cout << "Sequential find/replace: " << duration1.count() << " ms" << std::endl;
std::cout << "Build new string: " << duration2.count() << " ms" << std::endl;
std::cout << "Regex replacement: " << duration3.count() << " ms" << std::endl;
// Verify all methods produce same result
bool sameResult = (result1 == result2) && (result2 == result3);
std::cout << "All methods produce same result: " << (sameResult ? "Yes" : "No") << std::endl;
}
// 7. Advanced replacement scenarios
void AdvancedReplacementScenarios() {
std::cout << "\n=== Advanced Replacement Scenarios ===" << std::endl;
// Scenario 1: Template variable replacement
std::string templateStr = "Hello {name}, your order #{order} is {status}. Total: ${amount}.";
std::map<std::string, std::string> variables = {
{"{name}", "John Doe"},
{"{order}", "12345"},
{"{status}", "shipped"},
{"{amount}", "99.99"}
};
std::string filledTemplate = FillTemplate(templateStr, variables);
std::cout << "Template filling:" << std::endl;
std::cout << "Template: " << templateStr << std::endl;
std::cout << "Filled: " << filledTemplate << std::endl;
// Scenario 2: Smart quote replacement
std::string quotes = "This is 'a test' with "smart quotes" and 'single quotes'.";
std::string smartQuotes = ReplaceWithSmartQuotes(quotes);
std::cout << "\nSmart quote replacement:" << std::endl;
std::cout << "Original: " << quotes << std::endl;
std::cout << "Smart: " << smartQuotes << std::endl;
// Scenario 3: URL encoding/decoding replacement
std::string url = "https://example.com/search?q=hello world&category=C++ Programming";
std::string encoded = EncodeUrl(url);
std::string decoded = DecodeUrl(encoded);
std::cout << "\nURL encoding/decoding:" << std::endl;
std::cout << "Original: " << url << std::endl;
std::cout << "Encoded: " << encoded << std::endl;
std::cout << "Decoded: " << decoded << std::endl;
// Scenario 4: Number formatting replacement
std::string numbers = "Values: 1234, 56.789, 0.42, 1000.00";
std::string formattedNumbers = FormatNumbers(numbers);
std::cout << "\nNumber formatting:" << std::endl;
std::cout << "Original: " << numbers << std::endl;
std::cout << "Formatted: " << formattedNumbers << std::endl;
// Scenario 5: HTML entity replacement
std::string html = "5 < 10 && a > b. Use < and > in HTML.";
std::string htmlEntities = ProcessHtmlEntities(html);
std::cout << "\nHTML entity processing:" << std::endl;
std::cout << "Original: " << html << std::endl;
std::cout << "Processed: " << htmlEntities << std::endl;
}
// Fill template with variables
std::string FillTemplate(const std::string& templateStr, const std::map<std::string, std::string>& variables) {
std::string result = templateStr;
for (const auto& [placeholder, value] : variables) {
size_t pos = 0;
while ((pos = result.find(placeholder, pos)) != std::string::npos) {
result.replace(pos, placeholder.length(), value);
pos += value.length();
}
}
return result;
}
// Replace straight quotes with smart quotes
std::string ReplaceWithSmartQuotes(const std::string& text) {
std::string result = text;
bool inSingleQuote = false;
bool inDoubleQuote = false;
for (size_t i = 0; i < result.length(); i++) {
if (result[i] == '\''' && !inDoubleQuote && (i == 0 || result[i-1] != '\\')) {
result[i] = inSingleQuote ? ''' : '"';
inSingleQuote = !inSingleQuote;
} else if (result[i] == '"' && !inSingleQuote) {
result[i] = inDoubleQuote ? '"' : '"';
inDoubleQuote = !inDoubleQuote;
}
}
return result;
}
// Simple URL encoding (basic implementation)
std::string EncodeUrl(const std::string& url) {
std::string encoded;
for (char c : url) {
if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
encoded += c;
} else if (c == ' ') {
encoded += "%20";
} else {
std::ostringstream oss;
oss << std::hex << std::uppercase << static_cast<int>(static_cast<unsigned char>(c));
encoded += "%" + oss.str();
}
}
return encoded;
}
// Simple URL decoding
std::string DecodeUrl(const std::string& encoded) {
std::string decoded;
for (size_t i = 0; i < encoded.length(); i++) {
if (encoded[i] == '%' && i + 2 < encoded.length()) {
std::string hexStr = encoded.substr(i + 1, 2);
char c = static_cast<char>(std::stoi(hexStr, nullptr, 16));
decoded += c;
i += 2;
} else if (encoded[i] == '+') {
decoded += ' ';
} else {
decoded += encoded[i];
}
}
return decoded;
}
// Format numbers with thousands separators
std::string FormatNumbers(const std::string& text) {
std::string result = text;
std::regex numberPattern(R"(\b\d+(?:\.\d+)?\b)");
result = std::regex_replace(result, numberPattern, [](const std::smatch& match) {
std::string numStr = match.str();
size_t dotPos = numStr.find('.');
std::string integerPart = (dotPos != std::string::npos) ? numStr.substr(0, dotPos) : numStr;
std::string decimalPart = (dotPos != std::string::npos) ? numStr.substr(dotPos) : "";
// Add thousands separators to integer part
std::string formattedInt;
int count = 0;
for (int i = integerPart.length() - 1; i >= 0; i--) {
if (count > 0 && count % 3 == 0) {
formattedInt = "," + formattedInt;
}
formattedInt = integerPart[i] + formattedInt;
count++;
}
return formattedInt + decimalPart;
});
return result;
}
// Process HTML entities
std::string ProcessHtmlEntities(const std::string& html) {
std::string result = html;
// Define common HTML entities
std::unordered_map<std::string, std::string> entities = {
{"<", "<"},
{">", ">"},
{"&", "&"},
{""", """},
{"'", "'"}
};
for (const auto& [entity, character] : entities) {
size_t pos = 0;
while ((pos = result.find(entity, pos)) != std::string::npos) {
result.replace(pos, entity.length(), character);
pos += character.length();
}
}
return result;
}
int main() {
std::cout << "=== C++ Windows String Processing - String Replacement ===" << std::endl;
std::cout << "Demonstrating comprehensive string replacement and transformation techniques\n" << std::endl;
try {
// Run all replacement examples
BasicStringReplacement();
RegexReplacement();
ConditionalReplacement();
MultipleReplacements();
TransformationPipelines();
PerformanceOptimizedReplacement();
AdvancedReplacementScenarios();
std::cout << "\nAll string replacement examples completed successfully!" << std::endl;
} catch (const std::exception& e) {
std::cerr << "Unexpected error: " << e.what() << std::endl;
return 1;
}
return 0;
}