taksatouの日記: 2010/06

2010-06-27

ddclientメモ

以前から使っていたDiCEではここ最近になってなぜかIPの更新ができなくなってしまったので、DynDNS.comの説明を参考にしてddclientを導入した。

ダイナミックDNSサービスはDynDNSを利用
https://www.dyndns.com/

IPアドレス更新クライアントについて
https://www.dyndns.com/support/clients/

Linux / UNIX Update Clients
https://www.dyndns.com/support/clients/unix.html

上側に書いてあったのでddclientを採用。
主要なOSのバイナリは配布されている。
debianなら導入はapt-getでも可。

DynDNSを使っていれば設定ファイルは以下から自動で生成できる
https://www.dyndns.com/support/tools/clientconfig.html

なかみはこんな感じ

## ddclient configuration file
daemon=600
# check every 600 seconds
syslog=yes
# log update msgs to syslog
mail-failure=メールアドレス # Mail failed updates to user
pid=/var/run/ddclient.pid
# record PID in file.
## Detect IP with our CheckIP server
use=web, web=checkip.dyndns.com/, web-skip='IP Address'
## DynDNS username and password here
login=ログイン名
password=パスワード
## Default options
protocol=dyndns2
server=members.dyndns.org
## Dynamic DNS hosts
ホスト名

/etc/ddclient.confに書いて、/etc/init.d/ddclient restart で完了

libmemcachedについてるlibhashkitであそぶ

libmemcachedにlibhashkitなんていう便利そうなライブラリがついていたので、各種ハッシュアルゴリズムの特性を調べてみた。
http://libmemcached.org/libMemcached.html

libhashkitでつかえるアルゴリズムは以下の通り。(libhashkit/types.h参照)
それぞれのアルゴリズムの説明は検索すればでてくるので省略。

HASHKIT_HASH_MD5,
HASHKIT_HASH_CRC,
HASHKIT_HASH_FNV1_64,
HASHKIT_HASH_FNV1A_64,
HASHKIT_HASH_FNV1_32,
HASHKIT_HASH_FNV1A_32,
HASHKIT_HASH_HSIEH,
HASHKIT_HASH_MURMUR,
HASHKIT_HASH_JENKINS,

テスト方法は、互いに異なる文字列のdigest値をそれぞれ計算して、conflictが発生したかどうか、標準偏差、値の分布のヒストグラムをダンプするだけ。

作成したコードは以下の通り。

#include <boost/lambda/lambda.hpp>
#include <boost/lambda/bind.hpp>

#include <libhashkit/hashkit.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <set>
#include <map>
#include <stdlib.h>
#include <math.h>

using namespace std;

//#define TEST_RAND
#define TEST_CASES 10000
#define LETTER_NUM 62
static char letters[LETTER_NUM+1] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

static string rand_str(int len, unsigned int seed) {
    char buf[len+1];
    
    srand(seed + 1);
    for (int i = 0; i < len; i++) {
        buf[i] = letters[(int)((double)LETTER_NUM * rand() / (RAND_MAX + 1.0))];
    }
    buf[len] = '\0';
    return string(buf);
}

static set<string> rand_strings;

static void gen_rand_strings(int n)
{
    rand_strings.erase(rand_strings.begin(), rand_strings.end());
    
    for (int i = 0; i < n; ++i) {
        string s = rand_str(10, i);
        
        if (!rand_strings.insert(s).second) {
            cerr << "<" << s << "> is known!" << endl;
            ++n;
        }
    }
}
    
 

class TestHash {
private:
    multiset<uint32_t> results;
    Hashkit h;
    hashkit_hash_algorithm_t hash_type;
    int cases;

    void init(void) {
        results.erase(results.begin(), results.end());
    }
    
public:
    TestHash(int n, hashkit_hash_algorithm_t t) : cases(n), hash_type(t) {
        h.set_function(t);
    }
    
    void byRand(void) {
        init();
        
        for (set<string>::iterator it = rand_strings.begin(); it != rand_strings.end(); ++it) {
            string s = *it;
            results.insert(h.digest(s));
        }
    }
    
    void bySeq(void) {
        init();
        char buf[256];
        
        for (int i = 0; i < cases; ++i) {
            snprintf(buf, 256, "%010d", i);
            string s(buf);
            results.insert(h.digest(s));
        }
    }


    int conflict(void) {
        uint32_t last = -1;
        int c = 0;
        
        for (multiset<uint32_t>::iterator it = results.begin(); it != results.end(); ++it) {
            if (last == *it)
                ++c;
            last = *it;
        }
        return c;
    }

    double average(void) {        
        double sum = 0.0;

        for_each(results.begin(), results.end(), sum += boost::lambda::_1);
        return sum / cases;
    }

    double variance() {
        double avr = average();
        double sum = 0.0;

        for_each(results.begin(), results.end(), sum += bind(pow, avr - boost::lambda::_1, 2));
        return sum / cases;
    }

    double deviation(void) {
        return sqrt(variance());
    }

    vector<int> histogram(int n) {
        uint32_t max = *max_element(results.begin(), results.end());
        vector<int> ret;

        ret.assign(n+1, 0);
        for (multiset<uint32_t>::iterator it = results.begin(); it != results.end(); ++it) {
            ret[(int)((double) *it * n / max)] += 1;
        }
        ret.pop_back(); // remove max element
        return ret;
    }
    
    void dump(void) {
        cout << "conflict: " << conflict() << endl;
        //        cout << "average: " << average() << endl;
        //        cout << "variance: " << variance() << endl;
        cout << "deviation: " << (long)deviation() << endl;
        vector<int> ret = histogram(10);
        cout << "histogram: " << endl;
        for_each(ret.begin(), ret.end(), cout << boost::lambda::_1 << "\n");
    }

};

void exec_test(pair<hashkit_hash_algorithm_t, string> info)
{
    TestHash test(TEST_CASES, info.first);

    cout << ">>" << info.second << endl;
#ifdef TEST_RAND
    test.byRand();
    test.dump();
#else
    test.bySeq();
    test.dump();
#endif
    cout << endl;;

}

int main(void)
{
    map<hashkit_hash_algorithm_t, string> types;

    types.insert(make_pair(HASHKIT_HASH_MD5, "HASHKIT_HASH_MD5"));
//     types.insert(make_pair(HASHKIT_HASH_CRC, "HASHKIT_HASH_CRC"));
    types.insert(make_pair(HASHKIT_HASH_FNV1_64, "HASHKIT_HASH_FNV1_64"));
//     types.insert(make_pair(HASHKIT_HASH_FNV1A_64, "HASHKIT_HASH_FNV1A_64"));
//     types.insert(make_pair(HASHKIT_HASH_FNV1_32, "HASHKIT_HASH_FNV1_32"));
//     types.insert(make_pair(HASHKIT_HASH_FNV1A_32, "HASHKIT_HASH_FNV1A_32"));
    types.insert(make_pair(HASHKIT_HASH_HSIEH, "HASHKIT_HASH_HSIEH"));
    types.insert(make_pair(HASHKIT_HASH_MURMUR, "HASHKIT_HASH_MURMUR"));
    types.insert(make_pair(HASHKIT_HASH_JENKINS, "HASHKIT_HASH_JENKIN"));
    gen_rand_strings(TEST_CASES);
    
    for_each(types.begin(), types.end(), bind(exec_test, boost::lambda::_1));
    return 0;
}

libmemcachedのバージョンは0.4.0、boostは1.34.1その他環境は以下の通り

% c++ -v
Using built-in specs.
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 4.3.2-1.1' --with-bugurl=file:///usr/share/doc/gcc-4.3/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --enable-shared --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --enable-nls --with-gxx-include-dir=/usr/include/c++/4.3 --program-suffix=-4.3 --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --enable-mpfr --enable-cld --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 4.3.2 (Debian 4.3.2-1.1)
% uname -a
Linux chameleon 2.6.26-2-amd64 #1 SMP Thu Feb 11 00:59:32 UTC 2010 x86_64 GNU/Linux

ビルドは以下コマンド

c++ -lhashkit hash.cc

ランダム文字列(10文字)の場合。(データが長くなるので全部のアルゴリズムの結果は載せてません)

>>HASHKIT_HASH_MD5
conflict: 0
deviation: 1238971077
histogram:
1051
1004
1026
1028
1019
981
996
953
949
992

>>HASHKIT_HASH_FNV1_64
conflict: 0
deviation: 1243801177
histogram:
1027
977
1002
1020
995
985
976
988
1033
996

>>HASHKIT_HASH_HSIEH
conflict: 0
deviation: 1235906590
histogram:
945
1017
999
981
1034
1036
1002
954
1005
1026

>>HASHKIT_HASH_MURMUR
conflict: 0
deviation: 1238293295
histogram:
980
964
1008
958
1015
997
1004
1013
1069
991

>>HASHKIT_HASH_JENKIN
conflict: 0
deviation: 1236833209
histogram:
975
941
1028
1019
967
1028
966
1037
1002
1036

予想通りどのアルゴリズムもそれなりにきれいに分散しているように見える。
10万個程度までならほとんど衝突もない。

数字の場合。(e.g. 0000000001, 0000000002, ...)

>>HASHKIT_HASH_MD5
conflict: 0
deviation: 1242254142
histogram:
1046
985
1031
1072
974
1003
968
938
996
986

>>HASHKIT_HASH_FNV1_64
conflict: 0
deviation: 401256730
histogram:
0
0
0
0
0
0
4000
4000
0
1999

>>HASHKIT_HASH_HSIEH
conflict: 0
deviation: 1239810127
histogram:
1031
1029
1014
1032
1058
985
956
934
959
1001

>>HASHKIT_HASH_MURMUR
conflict: 0
deviation: 1238208481
histogram:
999
971
989
989
1005
1034
1000
987
1007
1018

>>HASHKIT_HASH_JENKIN
conflict: 0
deviation: 1223190204
histogram:
933
1014
1023
989
1052
1006
1006
1022
996
958

HASHKIT_HASH_FNV1_64の偏りがひどい。（衝突はしていない）
ほかのものはそれなりの性能を発揮しているように見える。

それぞれのアルゴリズムで衝突しやすいパターンをもっと探してみようと思ったけど疲れたのでまた今度。

2010-06-23

GoogleCLをdebianで試す

ちょっと便利そうなものがでてたので試してみました。

http://japan.cnet.com/news/service/story/0,3800104747,20415466,00.htm
http://code.google.com/p/googlecl/

まずプロジェクトページからダウンロード～普通にインストール

wget http://googlecl.googlecode.com/files/googlecl-0.9.7.tar.gz
tar xzf googlecl-0.9.7.tar.gz
cd googlecl-0.9.7
sudo python setup.py install

gdataが必要らしいので同様にインストール
http://code.google.com/p/gdata-python-client/

wget http://gdata-python-client.googlecode.com/files/gdata-2.0.10.tar.gz
tar xzf gdata-2.0.10.tar.gz
cd gdata-2.0.10
sudo python setup.py install

debianの場合は起動するpythonによって/usr/binにインストールされる場合と/usr/local/binになる場合があるので、エラーがでた場合は要確認。

ためしにbloggerにポストしてみます。
実際この記事は以下のコマンドでポストしました。

google blogger post --title "GoogleCLをdebianで試す" "`cat blog`"

複数ブログがある場合、このコマンドの後に番号で選択します。
なお、初回起動時はOAuthの認証が必要になります。
ブラウザがインストールされていれば自動で起動します。

設定をすればemacsとかでbloggerに投稿したりできるので便利です。

2010-06-21

bloggerのテンプレートの出し分けをする方法のメモ

bloggerのトップページと個別の記事ページでテンプレートの出し分けをしたくなったのでやり方をメモ。

<b:if cond='data:blog.pageType == "item"'>
foo
</b:if>

これでfooの部分が個別記事のなかでのみ有効になる

2010-06-05

AMQP関連の参考リンクまとめ

最近RabbitMQをよくいじっているので、参考にした記事などをまとめておきます。

RabbitMQとはerlangで実装されたAMQPの一つです。
http://www.rabbitmq.com/

概要をつかむには以下のスライドが参考になります。サンプルコードものってます。
http://www.slideshare.net/hungryblank/rabbitmq-with-python-and-ruby-rupy-2009

英語ですがこちらのブログ記事はチュートリアル的に書かれていてとても参考になりました
http://blogs.digitar.com/jjww/2009/01/rabbits-and-warrens/

下記のブログでもRabbitMQ関連の記事がよくポストされてます。
http://somic.org/category/rabbitmq/

greeのエンジニアブログでも最近紹介されてました
http://labs.gree.jp/blog/2010/06/262/

各種言語にAMQPライブラリが用意されてるのでそれを使うのが便利です。

pythonなら知る限りでは3種類のライブラリがあります。

py-amqplib：http://code.google.com/p/py-amqplib/

シンプルに使うならこれで大体のことはできるはずです。

pika：http://github.com/tonyg/pika

RabbitMQの開発者がつくってるようです。
asyncなのとsyncなコネクションのアダプターがついてるのでこれを使えばどんなアプリケーションでも柔軟に対応できます。tornadoのioloopと組み合わせたりも簡単にできます。

txAMQP：https://launchpad.net/txamqp

twistedベースの実装。twistedは使ったことがないのでノーコメント。

rubyならgem、perlならcpanにあります。詳細は省略しますが、その他の言語でも大体用意されてるようです。
くわしくRabbitMQのドキュメントにあるのでそちらをどうぞ
http://www.rabbitmq.com/how.html

その他、メーリングリストでの議論