diff --git a/src/antifreq.cpp b/src/antifreq.cpp new file mode 100644 index 0000000..3d3384e --- /dev/null +++ b/src/antifreq.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +using namespace std; + +bool SortByVal(const pair &a, const pair &b) +{ + if (a.second == b.second) + { + return a.first < b.first; + } + return a.second > b.second; +} + +int main(int argc, char *argv[]) +{ + if (argc != 3) + { + cout << "Example: " << argv[0] << " in.txt out.txt" << endl; + return 1; + } + + unordered_map dict; + vector> sortedList; + + ifstream fIn(argv[1]); + + const std::regex re("[a-z]+", std::regex_constants::icase); + std::regex::optimize; + std::locale loc; + std::smatch match; + + string line, word; + + while (std::getline(fIn, line)) + { + while (std::regex_search(line, match, re)) + { + word = match[0].str(); + + for (auto &chr : word) + { + chr = std::tolower(chr, loc); + } + + if (dict[word]) + { + ++dict[word]; + } + else + { + dict[word] = 1; + } + + line = match.suffix().str(); + } + } + + for (unordered_map ::iterator it = dict.begin(); it != dict.end(); ++it) + { + sortedList.push_back(make_pair(it->first, it->second)); + } + + sort(sortedList.begin(), sortedList.end(), SortByVal); + + ofstream fOut; + fOut.open(argv[2]); + + for (auto &elem : sortedList) + { + fOut << elem.first << " " << elem.second << endl; + } + + fIn.close(); + fOut.close(); + + return 0; +} \ No newline at end of file diff --git a/src/freq03.py b/src/freq03.py new file mode 100644 index 0000000..3ff5aa1 --- /dev/null +++ b/src/freq03.py @@ -0,0 +1,23 @@ +import re +import sys +from collections import defaultdict + + +if len(sys.argv) != 3: + print("Example: " + sys.argv[0] + " in.txt out.txt"); + sys.exit(1) + +words = defaultdict(int) +pattern = re.compile(r'[a-zA-Z]+') + +with open(sys.argv[1], 'r', encoding='utf-8', errors='ignore') as f: + for line in f: + str = pattern.findall(line) + for w in str: + words[w.lower()] += 1 + +with open(sys.argv[2], 'w', encoding='utf-8', errors='ignore') as result: + for w, c in sorted(words.items(), key=lambda item: (-item[1], item[0])): + result.write('%s %d\n' % (w, c)) + +