• 2011-11-22 12:00:00

    Fast serialization of data in PHP

    Serializing/Unserializing data

    Serialization is the process of converting a data structure or object state into a format that can be stored and "resurrected" later in the same or another computer environment. source

    There are a lot of areas where one can use serialization. A couple are:

    • in a database (storing an array of options specific to the user), 
    • in an AJAX enabled application (call to get a status update and display to the user without refreshing the whole page), etc.

    Based on the the application, serializing and unserializing data can be a very intensive process and can prove to have a big performance hit on the overall system.

    Options

    The most obvious option for serializing and unserializing data are the serialize and unserialize PHP functions. A bit less popular are json_encode and json_decode. There is also a third option, using a third party module that one can easily install on their server. This module is called igbinary.

    In this blog post I am comparing the three options, in the hope that it will aid you with your selection of the best option for you so as to increase the performance of your application.

    I created a test script that used several arrays of data (strings, integers, floats, booleans, objects, mixed data, all of the data types) to test the speed and size of the serialization and speed of unserialization of each of the three candidate function pairs. I run the same function to serialize or unserialize the data respectively for 1,000,000 times so as to produce the results below.

    The script that I have used is listed below:

    $_testStrings = array(
        'AK' => 'Alaska',   'AZ' => 'Arizona', 'VT' => 'Vermont',
        'VA' => 'Virginia', 'AZ' => 'West Virginia',
    );
    
    $_testIntegers = array(0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 84, 144,);
    
    $_testBooleans = array(TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE,);
    
    $_testFloats = array(
        0, 1.1, 1.1, 2.22, 3.33, 5.55, 8.88, 13.13, 21.2121, 34.3434, 
        55.5555, 84.8484, 144.144,
    );
    
    $_testMixed = array(
        'one', 13 => 'two', 0 => 25.46, 'four' => 0.007, 
        'five' => TRUE, TRUE => 42,
    );
    
    $_objectOne            = new stdClass();
    $_objectOne->firstname  = 'Leroy';
    $_objectOne->lastname   = 'Jenkins';
    $_objectOne->profession = 'Gamer';
    $_objectOne->status     = 'Legend';
    
    $_objectTwo        = new stdClass();
    $_objectTwo->series = 'Fibonacci';
    $_objectTwo->data   = $_testIntegers;
    
    $_testObjects = array($_objectOne, $_objectTwo,);
    
    $_maxLoop = 1000000;
    
    $_templateEncode = "%s [%s]: Size: %s bytes, %s time to encode\r\n";
    $_templateDecode = "%s [%s]: %s time to decode\r\n";
    
    set_time_limit(0);
    
    $_output = '';
    
    /**
     * Set the source arrays
     */
    $_allTestData = array(
        'str' => $_testStrings,
        'int' => $_testIntegers,
        'bln' => $_testBooleans,
        'flt' => $_testFloats,
        'mix' => $_testMixed,
        'obj' => $_testObjects,
    );
    
    $_testSources = array(
        'strings'  => $_testStrings,
        'integers' => $_testIntegers,
        'booleans' => $_testBooleans,
        'floats'   => $_testFloats,
        'mixed'    => $_testMixed,
        'objects'  => $_testObjects,
        'all'      => $_allTestData,
    );
    
    /**
     * ENCODE DATA
     */
    
    /**
     * Start each test
     */
    foreach ($_testSources as $_area => $_source)
    {
        /**
         * Start the timer
         */
        $_serializeStart = microtime(TRUE);
    
        for ($_counter = 0; $_counter < $_maxLoop; $_counter++)
        {
            serialize($_source);
        }
    
        $_serializeEnd = microtime(TRUE);
    
        $_serializeOutput = serialize($_source);
    
        $_output .= sprintf(
            $_templateEncode,
            'serialize()', 
            $_area, 
            strlen($_serializeOutput), 
            $_serializeEnd - $_serializeStart
        );
    
        /**
         * JSON
         */
        $_jsonStart = microtime(TRUE);
    
        for ($_counter = 0; $_counter < $_maxLoop; $_counter++)
        {
            json_encode($_source);
        }
    
        $_jsonEnd = microtime(TRUE);
    
        $_jsonOutput = json_encode($_source);
    
        $_output .= sprintf(
            $_templateEncode,
            'json_encode()', 
            $_area, 
            strlen($_jsonOutput), 
            $_jsonEnd - $_jsonStart
        );
    
        /**
         * igbinary
         */
        $_igbinaryStart = microtime(TRUE);
    
        for ($_counter = 0; $_counter < $_maxLoop; $_counter++)
        {
            igbinary_serialize($_source);
        }
    
        $_igbinaryEnd = microtime(TRUE);
    
        $_igbinaryOutput = igbinary_serialize($_source);
    
        $_output .= sprintf(
            $_templateEncode,
            'igbinary_serialize()', 
            $_area, 
            strlen($_igbinaryOutput), 
            $_igbinaryEnd - $_igbinaryStart
        );
    
        $_output .= str_repeat('=', 20) . "\r\n";
    }
    
    $_output .= str_repeat('=:=', 20) . "\r\n";
    
    
    /**
     * DECODE DATA
     */
    
    /**
     * Start each test
     */
    foreach ($_testSources as $_area => $_source)
    {
        /**
         * Start the timer
         */
        $_data = serialize($_source);
    
        $_serializeStart = microtime(TRUE);
    
        for ($_counter = 0; $_counter < $_maxLoop; $_counter++)
        {
            unserialize($_data);
        }
    
        $_serializeEnd = microtime(TRUE);
    
        $_output .= sprintf(
            $_templateDecode,
            'unserialize()', 
            $_area, 
            $_serializeEnd - $_serializeStart
        );
    
        /**
         * JSON
         */
        $_data = json_encode($_source);
    
        $_jsonStart = microtime(TRUE);
    
        for ($_counter = 0; $_counter < $_maxLoop; $_counter++)
        {
            json_decode($_data, TRUE);
        }
    
        $_jsonEnd = microtime(TRUE);
    
        $_jsonOutput = json_encode($_source);
    
        $_output .= sprintf(
            $_templateDecode,
            'json_decode()', 
            $_area, 
            $_jsonEnd - $_jsonStart
        );
    
        /**
         * igbinary
         */
        $_data = igbinary_serialize($_source);
    
        $_igbinaryStart = microtime(TRUE);
    
        for ($_counter = 0; $_counter < $_maxLoop; $_counter++)
        {
            igbinary_unserialize($_data);
        }
    
        $_igbinaryEnd = microtime(TRUE);
    
        $_igbinaryOutput = igbinary_serialize($_source);
    
        $_output .= sprintf(
            $_templateDecode,
            'igbinary_unserialize()', 
            $_area, 
            $_igbinaryEnd - $_igbinaryStart
        );
    
        $_output .= str_repeat('=', 20) . "\r\n";
    }
    
    echo '' . $_output . '</pre>';
    

    Serializing results

    When serializing data we are always concerned about the size of the result but also about the time it took for the data to be serialized.

    As far as size is concerned, json_encode seems to be producing the smallest result in bytes for most of the tests.

    Size comparison

    Strings

    serialize() [strings]: Size: 105 bytes, 1.8710339069366 time to encode
    json_encode() [strings]: Size: 67 bytes, 1.5691390037537 time to encode
    igbinary_serialize() [strings]: Size: 64 bytes, 3.2276048660278 time to encode <==
    

    Integers

    serialize() [integers]: Size: 121 bytes, 3.0198090076447 time to encode
    json_encode() [integers]: Size: 34 bytes, 1.2248229980469 time to encode <==
    igbinary_serialize() [integers]: Size: 58 bytes, 2.2877519130707 time to encode
    

    Booleans

    serialize() [booleans]: Size: 62 bytes, 2.0834550857544 time to encode
    json_encode() [booleans]: Size: 39 bytes, 1.0889070034027 time to encode
    igbinary_serialize() [booleans]: Size: 27 bytes, 1.8252439498901 time to encode <==
    

    Floats

    serialize() [floats]: Size: 709 bytes, 27.496570825577 time to encode
    json_encode() [floats]: Size: 77 bytes, 5.0476500988007 time to encode <==
    igbinary_serialize() [floats]: Size: 142 bytes, 2.4856028556824 time to encode
    

    Mixed

    serialize() [mixed]: Size: 178 bytes, 6.301619052887 time to encode
    json_encode() [mixed]: Size: 54 bytes, 2.0463008880615 time to encode
    igbinary_serialize() [mixed]: Size: 50 bytes, 2.3894169330597 time to encode <==
    

    Objects

    serialize() [objects]: Size: 326 bytes, 4.8698291778564 time to encode
    json_encode() [objects]: Size: 148 bytes, 2.4744520187378 time to encode <==
    igbinary_serialize() [objects]: Size: 177 bytes, 6.472992181778 time to encode
    

    All data types

    serialize() [all]: Size: 1567 bytes, 42.437592029572 time to encode
    json_encode() [all]: Size: 462 bytes, 9.9569129943848 time to encode <==
    igbinary_serialize() [all]: Size: 478 bytes, 18.053789138794 time to encode
    
    Speed comparison

    Analyzing the time it took for each test to be completed, we see again that json_encode is the clear winner (highlighted in bold the shortest time for the function).

    Strings

    serialize() [strings]: Size: 105 bytes, 1.8710339069366 time to encode
    json_encode() [strings]: Size: 67 bytes, 1.5691390037537 time to encode <==
    igbinary_serialize() [strings]: Size: 64 bytes, 3.2276048660278 time to encode
    

    Integers

    serialize() [integers]: Size: 121 bytes, 3.0198090076447 time to encode
    json_encode() [integers]: Size: 34 bytes, 1.2248229980469 time to encode <==
    igbinary_serialize() [integers]: Size: 58 bytes, 2.2877519130707 time to encode
    

    Booleans

    serialize() [booleans]: Size: 62 bytes, 2.0834550857544 time to encode
    json_encode() [booleans]: Size: 39 bytes, 1.0889070034027 time to encode <==
    igbinary_serialize() [booleans]: Size: 27 bytes, 1.8252439498901 time to encode
    

    Floats

    serialize() [floats]: Size: 709 bytes, 27.496570825577 time to encode
    json_encode() [floats]: Size: 77 bytes, 5.0476500988007 time to encode
    igbinary_serialize() [floats]: Size: 142 bytes, 2.4856028556824 time to encode <==
    

    Mixed

    serialize() [mixed]: Size: 178 bytes, 6.301619052887 time to encode
    json_encode() [mixed]: Size: 54 bytes, 2.0463008880615 time to encode <==
    igbinary_serialize() [mixed]: Size: 50 bytes, 2.3894169330597 time to encode
    

    Objects

    serialize() [objects]: Size: 326 bytes, 4.8698291778564 time to encode
    json_encode() [objects]: Size: 148 bytes, 2.4744520187378 time to encode <==
    igbinary_serialize() [objects]: Size: 177 bytes, 6.472992181778 time to encode
    

    All data types

    serialize() [all]: Size: 1567 bytes, 42.437592029572 time to encode
    json_encode() [all]: Size: 462 bytes, 9.9569129943848 time to encode <==
    igbinary_serialize() [all]: Size: 478 bytes, 18.053789138794 time to encode
    

    Combination

    Having the smallest result in size might not always be the best metric to base the choice of the serialization algorithm. For instance, looking at the results above in the Strings test, igbinary produces indeed the smallest result in size (64 bytes) but it takes twice as much to serialize the result in comparison to json_encode (3.22 vs. 1.56 seconds) and the size difference is a mere 3 bytes (64 vs. 67).

    Similarly, for the Boolean test, igbinary produces 27 bytes and json_encode 39 bytes. It does however take igbinary nearly 80% more time to produce the result compared to json_encode.

    For the Floats test the situation is reversed. json_encode produces a result that is around 50% smaller than the one of igbinary but it takes twice as much time to produce it.

    As far as serializing data, in my personal opinion, json_encode is the clear winner.

    Unserializing Results

    Unserializing data is equally - and at times - more important than serializing. In many applications, developers sacrifice performance in writing but don't compromise when reading data.

    In the tests below once can easily see that igbinary is the clear winner. At times the unserialize function is very close (or outperforms igbinary) but overall, igbinary is the the function that unserializes data the fastest.

    Speed comparison

    Strings

    unserialize() [strings]: 1.8259189128876 time to decode <==
    json_decode() [strings]: 2.6482670307159 time to decode
    igbinary_unserialize() [strings]: 1.8359968662262 time to decode
    

    Integers

    unserialize() [integers]: 2.3886890411377 time to decode <==
    json_decode() [integers]: 2.8659090995789 time to decode
    igbinary_unserialize() [integers]: 2.4441809654236 time to decode
    

    Booleans

    unserialize() [booleans]: 1.8097970485687 time to decode
    json_decode() [booleans]: 2.4416139125824 time to decode
    igbinary_unserialize() [booleans]: 1.7585029602051 time to decode <==
    

    Floats

    unserialize() [floats]: 18.512004137039 time to decode
    json_decode() [floats]: 3.7896130084991 time to decode
    igbinary_unserialize() [floats]: 2.6730649471283 time to decode <==
    

    Mixed

    unserialize() [mixed]: 4.6794769763947 time to decode
    json_decode() [mixed]: 2.7775249481201 time to decode
    igbinary_unserialize() [mixed]: 1.9598047733307 time to decode <==
    

    Objects

    unserialize() [objects]: 5.5468521118164 time to decode
    json_decode() [objects]: 5.7660481929779 time to decode
    igbinary_unserialize() [objects]: 5.2672090530396 time to decode <==
    

    All data types

    unserialize() [all]: 31.01339006424 time to decode
    json_decode() [all]: 14.574991941452 time to decode
    igbinary_unserialize() [all]: 10.734386920929 time to decode <==
    

    Conclusion

    If your application is mostly focused on reads rather than writes, igbinary is the clear winner, since it will unserialize your data faster than the other two functions. If however you are more focused on storing data, json_encode is the clear choice.

    Updates

    2013-03-07: memcached was not used with igbinary. PHP version for tests was 5.3.1 on a Linux Mint machine with 6GB RAM 2013-06-14: Reader Dennis has been kind enough to run the same script on his server and share the results with me. He run the scripts on a i7-3930K, 64GB, Debian Squeeze with the latest version of PHP (5.4.16) and igbinary.

    Serialize

    Strings

    serialize() [strings]: Size: 105 bytes, 0.63280701637268 time to encode <== Time
    json_encode() [strings]: Size: 67 bytes, 0.78271317481995 time to encode
    igbinary_serialize() [strings]: Size: 64 bytes, 0.97228002548218 time to encode <== Size
    

    Integers

    serialize() [integers]: Size: 121 bytes, 1.3659980297089 time to encode
    json_encode() [integers]: Size: 34 bytes, 0.46304202079773 time to encode <== Time/Size
    igbinary_serialize() [integers]: Size: 58 bytes, 0.65074491500854 time to encode
    

    Booleans

    serialize() [booleans]: Size: 62 bytes, 0.80747985839844 time to encode
    json_encode() [booleans]: Size: 39 bytes, 0.27534413337708 time to encode <== Time
    igbinary_serialize() [booleans]: Size: 27 bytes, 0.52206611633301 time to encode <== Size
    

    Floats

    serialize() [floats]: Size: 307 bytes, 6.3345258235931 time to encode
    json_encode() [floats]: Size: 77 bytes, 3.3697159290314 time to encode <== Time/Size
    igbinary_serialize() [floats]: Size: 142 bytes, 0.70451712608337 time to encode
    

    Mixed

    serialize() [mixed]: Size: 105 bytes, 1.4573359489441 time to encode
    json_encode() [mixed]: Size: 54 bytes, 0.98674011230469 time to encode
    igbinary_serialize() [mixed]: Size: 50 bytes, 0.71359205245972 time to encode <== Time/Size
    

    Objects

    serialize() [objects]: Size: 326 bytes, 2.4085388183594 time to encode
    json_encode() [objects]: Size: 148 bytes, 1.6553950309753 time to encode <== Time/Size
    igbinary_serialize() [objects]: Size: 177 bytes, 2.1983618736267 time to encode
    

    All

    serialize() [all]: Size: 1092 bytes, 13.614814043045 time to encode
    json_encode() [all]: Size: 462 bytes, 7.7341570854187 time to encode <== Size
    igbinary_serialize() [all]: Size: 478 bytes, 5.6470530033112 time to encode <== Time
    
    Unserialize

    Strings

    unserialize() [strings]: 0.69071316719055 time to decode
    json_decode() [strings]: 1.381010055542 time to decode
    igbinary_unserialize() [strings]: 0.52063202857971 time to decode <==
    

    Integers

    unserialize() [integers]: 1.0607678890228 time to decode
    json_decode() [integers]: 1.4053201675415 time to decode
    igbinary_unserialize() [integers]: 0.70937013626099 time to decode <==
    

    Booleans

    unserialize() [booleans]: 0.65101194381714 time to decode
    json_decode() [booleans]: 1.0951101779938 time to decode
    igbinary_unserialize() [booleans]: 0.49839997291565 time to decode <==
    

    Floats

    unserialize() [floats]: 5.3973641395569 time to decode
    json_decode() [floats]: 2.0127139091492 time to decode
    igbinary_unserialize() [floats]: 0.75269412994385 time to decode <==
    

    Mixed

    unserialize() [mixed]: 1.5048658847809 time to decode
    json_decode() [mixed]: 1.2782678604126 time to decode
    igbinary_unserialize() [mixed]: 0.55352306365967 time to decode <==
    

    Objects

    unserialize() [objects]: 2.6635551452637 time to decode
    json_decode() [objects]: 3.3167290687561 time to decode
    igbinary_unserialize() [objects]: 1.9917018413544 time to decode <==
    

    All

    unserialize() [all]: 11.949328899384 time to decode
    json_decode() [all]: 9.9836950302124 time to decode
    igbinary_unserialize() [all]: 4.4029591083527 time to decode <==
    

  • Advertisement