DS and Algo in C++

FFT

Simple c++ implementation of the Cooley–Tukey algorithm for arrays whose size is a power of 2.

[1]:
#include <complex>
#include <vector>
#include <cmath>     // M_PI
#include <iostream>

void _fft(std::vector<std::complex<double> >& out, const std::vector<double>& x, const int& n)
{
    // assuming n is a power of 2
    if (n==1)
    {
        out.resize(1);
        out[0] = x[0];
    }
    else{
        std::vector<std::complex<double> > o(n/2), e(n/2);
        std::vector<double> x_odd, x_even;
        size_t i=0;
        while (i < n){
            x_even.push_back(x[i++]);
            x_odd.push_back(x[i++]);
        }
        _fft(e, x_even, n/2);
        _fft(o, x_odd, n/2);

        std::vector<std::complex<double> > w;
        for (size_t k=0 ; k<n/2 ; k++)
            w.push_back(std::exp(-2*M_PI*k*std::complex<double>(0., 1.)/((double)n)));

        out.resize(n);
        for (size_t k=0 ; k<n/2 ; k++)
            out[k] = e[k] + w[k]*o[k];
        for (size_t k=0 ; k<n/2 ; k++)
            out[k+n/2] = e[k] - w[k]*o[k];
    }
}


std::vector<double> x = {0, 1, 2, 3};
std::vector<std::complex<double> > out;

_fft(out, x, 4);

for (auto c : out)
    std::cout << c << ", ";
(6,0), (-2,2), (-2,0), (-2,-2),
[1]:
// by Eigen

#pragma cling add_include_path("/srv/conda/envs/notebook/include/eigen3/")

#include <unsupported/Eigen/FFT>
#include <vector>
#include <complex>
#include <iostream>

Eigen::FFT<double> fft;
std::vector<double> a = {0, 1, 2, 3};
std::vector<std::complex<double> > out;

fft.fwd(out, a);

for (auto c : out)
    std::cout << c << ", ";
(6,0), (-2,2), (-2,0), (-2,-2),

Huffman Coding in C++

  • 結果是對的但有 memory leaking,只有 new 沒有 delete,下面的 dtor 寫的不對

  • coding 結果和 python 不一樣,可能是因為 priority queue 處理同 freq 的 node 方法不同

  • original_message 是用來計算 frequency table 和建立 Huffman Tree 的,可以和要壓縮的字串不同

[1]:
#include <iostream>
#include <string>
#include <queue>       // std::priority_queue
#include <vector>
#include <utility>     // std::pair, std::swap
#include <functional>  // std::greater
#include <unordered_map>

class HeapNode
{
public:
    HeapNode* left = nullptr;
    HeapNode* right = nullptr;
    int freq;
    char ch;

    HeapNode(int freq, char ch, HeapNode* left, HeapNode* right)
        : freq(freq), ch(ch), left(left), right(right) {}

    HeapNode(const HeapNode& other) {  // copy ctor
        HeapNode* p = clone(&other);

        this->freq = p->freq;
        this->ch = p->ch;
        this->left = p->left;
        this->right = p->right;
    }

//     ~HeapNode() {
//         deleteNode(this->left);
//         deleteNode(this->right);
//     }

//     void deleteNode(const HeapNode* p) {
//         if (p){
//             deleteNode(p->left);
//             deleteNode(p->right);
//             delete p;
//         }
//     }

    HeapNode* clone(const HeapNode* other) {
        HeapNode* p = new HeapNode(other->freq, other->ch, nullptr, nullptr);

        if (other->left)
            p->left = clone(other->left);
        if (other->right)
            p->right = clone(other->right);

        return p;
    }

    friend bool operator>(const HeapNode& a, const HeapNode& b) {
        return a.freq > b.freq;
    }
};
[2]:
class HuffmanCoding{

    std::string original_message;
    std::vector<std::pair<std::string, std::string>> codeCharPairs;
    const HeapNode* huffmanTree;
    std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>>* q;

    void buildHuffmanTree()
    {
        std::unordered_map<char, int> freq;
        for (const char& c : original_message)
            freq[c]++;

        std::vector<HeapNode> nodes;
        for (const auto& pair : freq){
            char letter = pair.first;
            int frequency = pair.second;
            nodes.push_back(HeapNode(frequency, letter, nullptr, nullptr));
        }

        q = new std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>>(std::greater<HeapNode>(), nodes);

        while(q->size() > 1)
        {
            HeapNode* left = new HeapNode(q->top());
            q->pop();

            HeapNode* right = new HeapNode(q->top());
            q->pop();

            HeapNode* root = new HeapNode(left->freq + right->freq, '\0', left, right);
            q->push(*root);
        }
        huffmanTree = &(q->top());
    }

    std::vector<std::pair<std::string, std::string>> getCodeCharPairs(const HeapNode* p)
    {
        // if this weren't a member function it would have to be defined as an auto as the original signature wouldn't work in cling
        // this is a known issue; see https://github.com/jupyter-xeus/xeus-cling/issues/40

        if (p->ch)
        {
            std::string ch(1, p->ch);
            return std::vector<std::pair<std::string, std::string>> {{ch, ""}};
        }
        else
        {
            std::vector<std::pair<std::string, std::string>> from_left = getCodeCharPairs(p->left);
            std::vector<std::pair<std::string, std::string>> from_right = getCodeCharPairs(p->right);

            for (auto& pair : from_left)
                pair.second = '0' + pair.second;

            for (auto& pair : from_right)
                pair.second = '1' + pair.second;

            from_left.insert(from_left.end(), from_right.begin(), from_right.end());
            return from_left;
        }
    }

public:
    HuffmanCoding(const std::string& original_message) : original_message(original_message) {}

    std::string compress(const std::string& message){
        if (!huffmanTree)
            buildHuffmanTree();
        if (codeCharPairs.size()==0)
            codeCharPairs = getCodeCharPairs(huffmanTree);

        std::unordered_map<std::string, std::string> codeTable(codeCharPairs.begin(), codeCharPairs.end());

        std::string compressed;
        for (const char& c : message)
        {
            std::string ch(1, c);
            compressed += codeTable[ch];
        }
        return compressed;
    }
    std::string decompress(const std::string& compressed){
        if (!huffmanTree)
            buildHuffmanTree();
        if (codeCharPairs.size()==0)
            codeCharPairs = getCodeCharPairs(huffmanTree);

        for (auto& pair : codeCharPairs)
            std::swap(pair.first, pair.second);

        std::unordered_map<std::string, std::string> charTable(codeCharPairs.begin(), codeCharPairs.end());

        std::string::const_iterator it = compressed.begin();
        std::string code, message;

        while (it!=compressed.end())
        {
            code += *it;
            if (charTable.find(code) != charTable.end()){
                message += charTable[code];
                code = "";
            }
            it++;
        }
        return message;
    }
};

std::string original_message(R"(In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".)");

original_message
[2]:
"In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes"."
[3]:
HuffmanCoding hc(original_message);

std::string message = original_message;
std::string compressed = hc.compress(message);
compressed
[3]:
"110110000110111110101010100011000010010000111001011011100111101001001100011011010110011101110110001011101000110101111010101101000101110001010010100110111000101010110010101011001011010011001110111111010111010010101111011110001011101101111101010100010110011101000011111011111110000011110110000101001101010010000010111101101110001010110100001100111101010111111101010000000101001000101110000111110000101101100101110100100111001111110101010001011001110001010100111000111101000011111110101010100011000110100110000010101101111001000111100001011110111101010110111000011010001100110000111000011001111100100111000101111111101010101000110000101101100001100110100101001100000011110000000010101100111100001011010101101011000011001111110101011111110111010001100010010001101101110111101010110111100100011010001101101110111001110010110100101011101111111101010100010110011110000101101010110101100110000100011111100110101011011110001110001110110001111110101011111101011101001010111101111000101110110111110101010001001000110110111010011001110111011011101110000111011101010101100100000101010100011110010110011011110110000001101010000110000101111001101010110111110110100111110111100100001011110011110000001111010111010010101111011110001011101101110000001010100100000011100111010101100111000000101110011111011111110011101011010000001110110100000011110011000110010001011000110000111101110001111010111111011000000000010011001110111011000101111000010010100110100001010000110101011000010111010001101110001010101100111110110010100111011100111000100111111111100000111100001100101101111101111110011110111010111111000001010101010001011110111101010110111000101010110011111011011101001100011000110110100101101000010100101001101111010101111110101111010001100100100011001010001110110011100111110110000101001001100010011101101101001011011111011011101000101100001111011111000001"
[4]:
std::string recovered = hc.decompress(compressed);
recovered
[4]:
"In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes"."
[5]:
(recovered == message)
[5]:
true

Huffman Coding in C++, Smart Pointer Version

  • 還沒改好的 shared_ptr 版本

[1]:
#include <iostream>
#include <string>
#include <memory>      // std::shared_ptr, std::make_shared
#include <queue>       // std::priority_queue
#include <vector>
#include <utility>     // std::pair, std::swap
#include <functional>  // std::greater
#include <unordered_map>

class HeapNode
{
public:
    std::shared_ptr<HeapNode> left = nullptr;
    std::shared_ptr<HeapNode> right = nullptr;
    int freq;
    char ch;

    HeapNode(int freq, char ch, std::shared_ptr<HeapNode> left, std::shared_ptr<HeapNode> right)
        : freq(freq), ch(ch), left(left), right(right) {}

    HeapNode(const HeapNode& other) {  // copy ctor
        std::shared_ptr<HeapNode> p = clone(std::make_shared<HeapNode>(&other));

        this->freq = p->freq;
        this->ch = p->ch;
        this->left = p->left;
        this->right = p->right;
    }

//     ~HeapNode() {
//         deleteNode(this->left);
//         deleteNode(this->right);
//     }

//     void deleteNode(const HeapNode* p) {
//         if (p){
//             deleteNode(p->left);
//             deleteNode(p->right);
//             delete p;
//         }
//     }

    std::shared_ptr<HeapNode> clone(std::shared_ptr<HeapNode> other) {
        std::shared_ptr<HeapNode> p = std::make_shared<HeapNode>(other->freq, other->ch, nullptr, nullptr);

        if (other->left)
            p->left = clone(other->left);
        if (other->right)
            p->right = clone(other->right);

        return p;
    }

    friend bool operator>(const HeapNode& a, const HeapNode& b) {
        return a.freq > b.freq;
    }
};
In file included from input_line_5:1:
In file included from /srv/conda/envs/notebook/include/xeus/xinterpreter.hpp:13:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/functional:54:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/tuple:39:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/array:39:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/stdexcept:39:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/string:41:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/allocator.h:46:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0/bits/c++allocator.h:33:
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/ext/new_allocator.h:136:23: error: no matching constructor for initialization of '__cling_N52::HeapNode'
        { ::new((void *)__p) _Up(std::forward<_Args>(__args)...); }
                             ^   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/alloc_traits.h:475:8: note: in instantiation of function template specialization '__gnu_cxx::new_allocator<__cling_N52::HeapNode>::construct<__cling_N52::HeapNode, const __cling_N52::HeapNode *>' requested here
        { __a.construct(__p, std::forward<_Args>(__args)...); }
              ^
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/shared_ptr_base.h:526:30: note: in instantiation of function template specialization 'std::allocator_traits<std::allocator<__cling_N52::HeapNode> >::construct<__cling_N52::HeapNode, const __cling_N52::HeapNode *>' requested here
          allocator_traits<_Alloc>::construct(__a, _M_ptr(),
                                    ^
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/shared_ptr_base.h:637:18: note: in instantiation of function template specialization 'std::_Sp_counted_ptr_inplace<__cling_N52::HeapNode, std::allocator<__cling_N52::HeapNode>, __gnu_cxx::_Lock_policy::_S_atomic>::_Sp_counted_ptr_inplace<const __cling_N52::HeapNode *>' requested here
          ::new (__mem) _Sp_cp_type(std::move(__a),
                        ^
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/shared_ptr_base.h:1294:14: note: in instantiation of function template specialization 'std::__shared_count<__gnu_cxx::_Lock_policy::_S_atomic>::__shared_count<__cling_N52::HeapNode, std::allocator<__cling_N52::HeapNode>, const __cling_N52::HeapNode *>' requested here
        : _M_ptr(), _M_refcount(__tag, (_Tp*)0, __a,
                    ^
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/shared_ptr.h:344:4: note: in instantiation of function template specialization 'std::__shared_ptr<__cling_N52::HeapNode, __gnu_cxx::_Lock_policy::_S_atomic>::__shared_ptr<std::allocator<__cling_N52::HeapNode>, const __cling_N52::HeapNode *>' requested here
        : __shared_ptr<_Tp>(__tag, __a, std::forward<_Args>(__args)...)
          ^
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/shared_ptr.h:690:14: note: in instantiation of function template specialization 'std::shared_ptr<__cling_N52::HeapNode>::shared_ptr<std::allocator<__cling_N52::HeapNode>, const __cling_N52::HeapNode *>' requested here
      return shared_ptr<_Tp>(_Sp_make_shared_tag(), __a,
             ^
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/shared_ptr.h:706:19: note: in instantiation of function template specialization 'std::allocate_shared<__cling_N52::HeapNode, std::allocator<__cling_N52::HeapNode>, const __cling_N52::HeapNode *>' requested here
      return std::allocate_shared<_Tp>(std::allocator<_Tp_nc>(),
                  ^
input_line_8:13:50: note: in instantiation of function template specialization 'std::make_shared<__cling_N52::HeapNode, const __cling_N52::HeapNode *>' requested here
        std::shared_ptr<HeapNode> p = clone(std::make_shared<HeapNode>(&other));
                                                 ^
input_line_8:12:5: note: candidate constructor not viable: no known conversion from 'const __cling_N52::HeapNode *' to 'const __cling_N52::HeapNode' for 1st argument; dereference the argument with *
    HeapNode(const HeapNode& other) {  // copy ctor
    ^
input_line_8:9:5: note: candidate constructor not viable: requires 4 arguments, but 1 was provided
    HeapNode(int freq, char ch, std::shared_ptr<HeapNode> left, std::shared_ptr<HeapNode> right)
    ^
Interpreter Error:
[8]:
class HuffmanCoding{

    std::string original_message;
    std::vector<std::pair<std::string, std::string>> codeCharPairs;
    const std::shared_ptr<HeapNode> huffmanTree;
    std::shared_ptr<std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>>> q;

    void buildHuffmanTree()
    {
        std::unordered_map<char, int> freq;
        for (const char& c : original_message)
            freq[c]++;

        std::vector<HeapNode> nodes;
        for (const auto& pair : freq){
            char letter = pair.first;
            int frequency = pair.second;
            nodes.push_back(HeapNode(frequency, letter, nullptr, nullptr));
        }

        q = std::make_shared<std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>>>(std::greater<HeapNode>(), nodes);

        while(q->size() > 1)
        {
            std::shared_ptr<HeapNode> left = std::make_shared<HeapNode>(q->top());
            q->pop();

            std::shared_ptr<HeapNode> right = std::make_shared<HeapNode>(q->top());
            q->pop();

            std::shared_ptr<HeapNode> root = std::make_shared<HeapNode>(left->freq + right->freq, '\0', left, right);
            q->push(*root);
        }
        huffmanTree = &(q->top());
    }

    std::vector<std::pair<std::string, std::string>> getCodeCharPairs(const std::shared_ptr<HeapNode> p)
    {
        // if this weren't a member function it would have to be defined as an auto as the original signature wouldn't work in cling
        // this is a known issue; see https://github.com/jupyter-xeus/xeus-cling/issues/40

        if (p->ch)
        {
            std::string ch(1, p->ch);
            return std::vector<std::pair<std::string, std::string>> {{ch, ""}};
        }
        else
        {
            std::vector<std::pair<std::string, std::string>> from_left = getCodeCharPairs(p->left);
            std::vector<std::pair<std::string, std::string>> from_right = getCodeCharPairs(p->right);

            for (auto& pair : from_left)
                pair.second = '0' + pair.second;

            for (auto& pair : from_right)
                pair.second = '1' + pair.second;

            from_left.insert(from_left.end(), from_right.begin(), from_right.end());
            return from_left;
        }
    }

public:
    HuffmanCoding(const std::string& original_message) : original_message(original_message) {}

    std::string compress(const std::string& message){
        if (!huffmanTree)
            buildHuffmanTree();
        if (codeCharPairs.size()==0)
            codeCharPairs = getCodeCharPairs(huffmanTree);

        std::unordered_map<std::string, std::string> codeTable(codeCharPairs.begin(), codeCharPairs.end());

        std::string compressed;
        for (const char& c : message)
        {
            std::string ch(1, c);
            compressed += codeTable[ch];
        }
        return compressed;
    }
    std::string decompress(const std::string& compressed){
        if (!huffmanTree)
            buildHuffmanTree();
        if (codeCharPairs.size()==0)
            codeCharPairs = getCodeCharPairs(huffmanTree);

        for (auto& pair : codeCharPairs)
            std::swap(pair.first, pair.second);

        std::unordered_map<std::string, std::string> charTable(codeCharPairs.begin(), codeCharPairs.end());

        std::string::const_iterator it = compressed.begin();
        std::string code, decompressed;

        while (it!=compressed.end())
        {
            code += *it;
            if (charTable.find(code) != charTable.end()){
                decompressed += charTable[code];
                code = "";
            }
            it++;
        }
        return decompressed;
    }
};

std::string original_message(R"(In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".)");

original_message
[8]:
"In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes"."
[9]:
HuffmanCoding hc(original_message);

std::string message = original_message;
std::string compressed = hc.compress(message);
compressed
[9]:
"110110000110111110101010100011000010010000111001011011100111101001001100011011010110011101110110001011101000110101111010101101000101110001010010100110111000101010110010101011001011010011001110111111010111010010101111011110001011101101111101010100010110011101000011111011111110000011110110000101001101010010000010111101101110001010110100001100111101010111111101010000000101001000101110000111110000101101100101110100100111001111110101010001011001110001010100111000111101000011111110101010100011000110100110000010101101111001000111100001011110111101010110111000011010001100110000111000011001111100100111000101111111101010101000110000101101100001100110100101001100000011110000000010101100111100001011010101101011000011001111110101011111110111010001100010010001101101110111101010110111100100011010001101101110111001110010110100101011101111111101010100010110011110000101101010110101100110000100011111100110101011011110001110001110110001111110101011111101011101001010111101111000101110110111110101010001001000110110111010011001110111011011101110000111011101010101100100000101010100011110010110011011110110000001101010000110000101111001101010110111110110100111110111100100001011110011110000001111010111010010101111011110001011101101110000001010100100000011100111010101100111000000101110011111011111110011101011010000001110110100000011110011000110010001011000110000111101110001111010111111011000000000010011001110111011000101111000010010100110100001010000110101011000010111010001101110001010101100111110110010100111011100111000100111111111100000111100001100101101111101111110011110111010111111000001010101010001011110111101010110111000101010110011111011011101001100011000110110100101101000010100101001101111010101111110101111010001100100100011001010001110110011100111110110000101001001100010011101101101001011011111011011101000101100001111011111000001"
[10]:
std::string recovered = hc.decompress(compressed);
recovered
[10]:
"In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes"."
[11]:
(recovered == message)
[11]:
true