You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@trafficserver.apache.org by James Peach <ja...@me.com> on 2012/12/17 20:43:40 UTC

struct member packing

Hi all,

I came across pahole <http://linux.die.net/man/1/pahole>, and it's able to show some pretty interesting info about ATS internal structures. It looks like there's probably some opportunities to pack structure better, and probably improve cache utilization.

For example, HttpSM could be 54 bytes smaller:

~/trafficserver.git$ pahole -C HttpSM ./proxy/.libs/traffic_server 
die__process_function: tag not supported (template_type_parameter)!
die__process_class: tag not supported (template_value_parameter)!
struct HttpSM : public Continuation {
	struct History {
		const char  *              fileline;             /*     0     8 */
		short unsigned int         event;                /*     8     2 */
		short int                  reentrancy;           /*    10     2 */

		/* size: 16, cachelines: 1, members: 3 */
		/* last cacheline: 16 bytes */

		/* BRAIN FART ALERT! 16 != 12 + 0(holes), diff = 4 */

	};

	class Link_debug_link : public Link<HttpSM> {
	public:

		/* struct Link<HttpSM>        <ancestor>; */     /*     0    16 */
		class HttpSM * & next_link(class HttpSM *); /* linkage=_ZN6HttpSM15Link_debug_link9next_linkEPS_ */

		class HttpSM * & prev_link(class HttpSM *); /* linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPS_ */

		const class HttpSM  * next_link(const class HttpSM  *); /* linkage=_ZN6HttpSM15Link_debug_link9next_linkEPKS_ */

		const class HttpSM  * prev_link(const class HttpSM  *); /* linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPKS_ */


		/* size: 16, cachelines: 1, members: 1 */
		/* last cacheline: 16 bytes */
	};

public:

	/* struct Continuation        <ancestor>; */     /*     0    48 */
	int64_t                    sm_id;                /*    48     8 */
	unsigned int               magic;                /*    56     4 */
	bool                       enable_redirection;   /*    60     1 */
	bool                       api_enable_redirection; /*    61     1 */

	/* XXX 2 bytes hole, try to pack */

	/* --- cacheline 1 boundary (64 bytes) --- */
	char *                     redirect_url;         /*    64     8 */
	int                        redirect_url_len;     /*    72     4 */
	int                        redirection_tries;    /*    76     4 */
	int64_t                    transfered_bytes;     /*    80     8 */
	bool                       post_failed;          /*    88     1 */
	bool                       debug_on;             /*    89     1 */

	/* XXX 2 bytes hole, try to pack */

	enum HttpPluginTunnel_t    plugin_tunnel_type;   /*    92     4 */
	class PluginVCCore *       plugin_tunnel;        /*    96     8 */
	struct State               t_state;              /*   104  5920 */
protected:

	/* --- cacheline 94 boundary (6016 bytes) was 8 bytes ago --- */
	int                        reentrancy_count;     /*  6024     4 */

	/* XXX 4 bytes hole, try to pack */

	struct History             history[64];          /*  6032  1024 */
	/* --- cacheline 110 boundary (7040 bytes) was 16 bytes ago --- */
	int                        history_pos;          /*  7056     4 */

	/* XXX 4 bytes hole, try to pack */

	class HttpTunnel          tunnel;                /*  7064  1072 */
	/* --- cacheline 127 boundary (8128 bytes) was 8 bytes ago --- */
	struct HttpVCTable         vc_table;             /*  8136   288 */
	/* --- cacheline 131 boundary (8384 bytes) was 40 bytes ago --- */
	class HttpVCTableEntry *   ua_entry;             /*  8424     8 */
	class HttpClientSession *  ua_session;           /*  8432     8 */
	enum BackgroundFill_t      background_fill;      /*  8440     4 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 132 boundary (8448 bytes) --- */
	class IOBufferReader *     ua_buffer_reader;     /*  8448     8 */
	class HttpVCTableEntry *   server_entry;         /*  8456     8 */
	class HttpServerSession *  server_session;       /*  8464     8 */
	int                        shared_session_retries; /*  8472     4 */

	/* XXX 4 bytes hole, try to pack */

	class IOBufferReader *     server_buffer_reader; /*  8480     8 */
	struct HttpTransformInfo   transform_info;       /*  8488    16 */
	struct HttpTransformInfo   post_transform_info;  /*  8504    16 */
	/* --- cacheline 133 boundary (8512 bytes) was 8 bytes ago --- */
	class HttpCacheSM         cache_sm;              /*  8520   184 */
	/* --- cacheline 136 boundary (8704 bytes) --- */
	class HttpCacheSM         transform_cache_sm;    /*  8704   184 */
	/* --- cacheline 138 boundary (8832 bytes) was 56 bytes ago --- */
	class HttpCacheSM *        second_cache_sm;      /*  8888     8 */
	/* --- cacheline 139 boundary (8896 bytes) --- */
	HttpSMHandler              default_handler;      /*  8896    16 */
	class Action *             pending_action;       /*  8912     8 */
	class Action *             historical_action;    /*  8920     8 */
	class Continuation *       schedule_cont;        /*  8928     8 */
	struct HTTPParser          http_parser;          /*  8936    48 */
	/* --- cacheline 140 boundary (8960 bytes) was 24 bytes ago --- */
	enum StateMachineAction_t  last_action;          /*  8984     4 */

	/* XXX 4 bytes hole, try to pack */

	struct {
		int                (*__pfn)(class HttpSM *, int, void *); /*  8992     8 */
		long int           __delta;              /*  9000     8 */
	} m_last_state;                                  /*  8992    16 */
	int                        client_request_hdr_bytes; /*  9008     4 */

	/* XXX 4 bytes hole, try to pack */

	int64_t                    client_request_body_bytes; /*  9016     8 */
	/* --- cacheline 141 boundary (9024 bytes) --- */
	int                        server_request_hdr_bytes; /*  9024     4 */

	/* XXX 4 bytes hole, try to pack */

	int64_t                    server_request_body_bytes; /*  9032     8 */
	int                        server_response_hdr_bytes; /*  9040     4 */

	/* XXX 4 bytes hole, try to pack */

	int64_t                    server_response_body_bytes; /*  9048     8 */
	int                        client_response_hdr_bytes; /*  9056     4 */

	/* XXX 4 bytes hole, try to pack */

	int64_t                    client_response_body_bytes; /*  9064     8 */
	int                        cache_response_hdr_bytes; /*  9072     4 */

	/* XXX 4 bytes hole, try to pack */

	int64_t                    cache_response_body_bytes; /*  9080     8 */
	/* --- cacheline 142 boundary (9088 bytes) --- */
	int                        pushed_response_hdr_bytes; /*  9088     4 */

	/* XXX 4 bytes hole, try to pack */

	int64_t                    pushed_response_body_bytes; /*  9096     8 */
	class TransactionMilestones milestones;          /*  9104   152 */
	/* --- cacheline 144 boundary (9216 bytes) was 40 bytes ago --- */
	int                        hooks_set;            /*  9256     4 */
	TSHttpHookID               cur_hook_id;          /*  9260     4 */
	class APIHook *            cur_hook;             /*  9264     8 */
	int64_t                    prev_hook_start_time; /*  9272     8 */
	/* --- cacheline 145 boundary (9280 bytes) --- */
	int                        cur_hooks;            /*  9280     4 */
	enum HttpApiState_t        callout_state;        /*  9284     4 */
	class HttpAPIHooks        api_hooks;             /*  9288   264 */
	/* --- cacheline 149 boundary (9536 bytes) was 16 bytes ago --- */
	bool                       terminate_sm;         /*  9552     1 */
	bool                       kill_this_async_done; /*  9553     1 */

	/* XXX 6 bytes hole, try to pack */

	struct Link<HttpSM>        debug_link;           /*  9560    16 */
        ...

	/* vtable has 4 entries: {
	   [2] = destroy(_ZN6HttpSM7destroyEv), 
	   [3] = handle_api_return(_ZN6HttpSM17handle_api_returnEv), 
	   [4] = set_next_state(_ZN6HttpSM14set_next_stateEv), 
	   [5] = kill_this_async_hook(_ZN6HttpSM20kill_this_async_hookEiPv), 
	} */
	/* size: 9576, cachelines: 150, members: 62 */
	/* sum members: 9522, holes: 14, sum holes: 54 */
	/* last cacheline: 40 bytes */
};


Re: struct member packing

Posted by Yunkai Zhang <yu...@gmail.com>.
Good job!


On Tue, Dec 18, 2012 at 3:43 AM, James Peach <ja...@me.com> wrote:

> Hi all,
>
> I came across pahole <http://linux.die.net/man/1/pahole>, and it's able
> to show some pretty interesting info about ATS internal structures. It
> looks like there's probably some opportunities to pack structure better,
> and probably improve cache utilization.
>
> For example, HttpSM could be 54 bytes smaller:
>
> ~/trafficserver.git$ pahole -C HttpSM ./proxy/.libs/traffic_server
> die__process_function: tag not supported (template_type_parameter)!
> die__process_class: tag not supported (template_value_parameter)!
> struct HttpSM : public Continuation {
>         struct History {
>                 const char  *              fileline;             /*     0
>     8 */
>                 short unsigned int         event;                /*     8
>     2 */
>                 short int                  reentrancy;           /*    10
>     2 */
>
>                 /* size: 16, cachelines: 1, members: 3 */
>                 /* last cacheline: 16 bytes */
>
>                 /* BRAIN FART ALERT! 16 != 12 + 0(holes), diff = 4 */
>
>         };
>
>         class Link_debug_link : public Link<HttpSM> {
>         public:
>
>                 /* struct Link<HttpSM>        <ancestor>; */     /*     0
>    16 */
>                 class HttpSM * & next_link(class HttpSM *); /*
> linkage=_ZN6HttpSM15Link_debug_link9next_linkEPS_ */
>
>                 class HttpSM * & prev_link(class HttpSM *); /*
> linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPS_ */
>
>                 const class HttpSM  * next_link(const class HttpSM  *); /*
> linkage=_ZN6HttpSM15Link_debug_link9next_linkEPKS_ */
>
>                 const class HttpSM  * prev_link(const class HttpSM  *); /*
> linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPKS_ */
>
>
>                 /* size: 16, cachelines: 1, members: 1 */
>                 /* last cacheline: 16 bytes */
>         };
>
> public:
>
>         /* struct Continuation        <ancestor>; */     /*     0    48 */
>         int64_t                    sm_id;                /*    48     8 */
>         unsigned int               magic;                /*    56     4 */
>         bool                       enable_redirection;   /*    60     1 */
>         bool                       api_enable_redirection; /*    61     1
> */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         char *                     redirect_url;         /*    64     8 */
>         int                        redirect_url_len;     /*    72     4 */
>         int                        redirection_tries;    /*    76     4 */
>         int64_t                    transfered_bytes;     /*    80     8 */
>         bool                       post_failed;          /*    88     1 */
>         bool                       debug_on;             /*    89     1 */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         enum HttpPluginTunnel_t    plugin_tunnel_type;   /*    92     4 */
>         class PluginVCCore *       plugin_tunnel;        /*    96     8 */
>         struct State               t_state;              /*   104  5920 */
> protected:
>
>         /* --- cacheline 94 boundary (6016 bytes) was 8 bytes ago --- */
>         int                        reentrancy_count;     /*  6024     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         struct History             history[64];          /*  6032  1024 */
>         /* --- cacheline 110 boundary (7040 bytes) was 16 bytes ago --- */
>         int                        history_pos;          /*  7056     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         class HttpTunnel          tunnel;                /*  7064  1072 */
>         /* --- cacheline 127 boundary (8128 bytes) was 8 bytes ago --- */
>         struct HttpVCTable         vc_table;             /*  8136   288 */
>         /* --- cacheline 131 boundary (8384 bytes) was 40 bytes ago --- */
>         class HttpVCTableEntry *   ua_entry;             /*  8424     8 */
>         class HttpClientSession *  ua_session;           /*  8432     8 */
>         enum BackgroundFill_t      background_fill;      /*  8440     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         /* --- cacheline 132 boundary (8448 bytes) --- */
>         class IOBufferReader *     ua_buffer_reader;     /*  8448     8 */
>         class HttpVCTableEntry *   server_entry;         /*  8456     8 */
>         class HttpServerSession *  server_session;       /*  8464     8 */
>         int                        shared_session_retries; /*  8472     4
> */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         class IOBufferReader *     server_buffer_reader; /*  8480     8 */
>         struct HttpTransformInfo   transform_info;       /*  8488    16 */
>         struct HttpTransformInfo   post_transform_info;  /*  8504    16 */
>         /* --- cacheline 133 boundary (8512 bytes) was 8 bytes ago --- */
>         class HttpCacheSM         cache_sm;              /*  8520   184 */
>         /* --- cacheline 136 boundary (8704 bytes) --- */
>         class HttpCacheSM         transform_cache_sm;    /*  8704   184 */
>         /* --- cacheline 138 boundary (8832 bytes) was 56 bytes ago --- */
>         class HttpCacheSM *        second_cache_sm;      /*  8888     8 */
>         /* --- cacheline 139 boundary (8896 bytes) --- */
>         HttpSMHandler              default_handler;      /*  8896    16 */
>         class Action *             pending_action;       /*  8912     8 */
>         class Action *             historical_action;    /*  8920     8 */
>         class Continuation *       schedule_cont;        /*  8928     8 */
>         struct HTTPParser          http_parser;          /*  8936    48 */
>         /* --- cacheline 140 boundary (8960 bytes) was 24 bytes ago --- */
>         enum StateMachineAction_t  last_action;          /*  8984     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         struct {
>                 int                (*__pfn)(class HttpSM *, int, void *);
> /*  8992     8 */
>                 long int           __delta;              /*  9000     8 */
>         } m_last_state;                                  /*  8992    16 */
>         int                        client_request_hdr_bytes; /*  9008
> 4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         int64_t                    client_request_body_bytes; /*  9016
> 8 */
>         /* --- cacheline 141 boundary (9024 bytes) --- */
>         int                        server_request_hdr_bytes; /*  9024
> 4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         int64_t                    server_request_body_bytes; /*  9032
> 8 */
>         int                        server_response_hdr_bytes; /*  9040
> 4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         int64_t                    server_response_body_bytes; /*  9048
>   8 */
>         int                        client_response_hdr_bytes; /*  9056
> 4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         int64_t                    client_response_body_bytes; /*  9064
>   8 */
>         int                        cache_response_hdr_bytes; /*  9072
> 4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         int64_t                    cache_response_body_bytes; /*  9080
> 8 */
>         /* --- cacheline 142 boundary (9088 bytes) --- */
>         int                        pushed_response_hdr_bytes; /*  9088
> 4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         int64_t                    pushed_response_body_bytes; /*  9096
>   8 */
>         class TransactionMilestones milestones;          /*  9104   152 */
>         /* --- cacheline 144 boundary (9216 bytes) was 40 bytes ago --- */
>         int                        hooks_set;            /*  9256     4 */
>         TSHttpHookID               cur_hook_id;          /*  9260     4 */
>         class APIHook *            cur_hook;             /*  9264     8 */
>         int64_t                    prev_hook_start_time; /*  9272     8 */
>         /* --- cacheline 145 boundary (9280 bytes) --- */
>         int                        cur_hooks;            /*  9280     4 */
>         enum HttpApiState_t        callout_state;        /*  9284     4 */
>         class HttpAPIHooks        api_hooks;             /*  9288   264 */
>         /* --- cacheline 149 boundary (9536 bytes) was 16 bytes ago --- */
>         bool                       terminate_sm;         /*  9552     1 */
>         bool                       kill_this_async_done; /*  9553     1 */
>
>         /* XXX 6 bytes hole, try to pack */
>
>         struct Link<HttpSM>        debug_link;           /*  9560    16 */
>         ...
>
>         /* vtable has 4 entries: {
>            [2] = destroy(_ZN6HttpSM7destroyEv),
>            [3] = handle_api_return(_ZN6HttpSM17handle_api_returnEv),
>            [4] = set_next_state(_ZN6HttpSM14set_next_stateEv),
>            [5] =
> kill_this_async_hook(_ZN6HttpSM20kill_this_async_hookEiPv),
>         } */
>         /* size: 9576, cachelines: 150, members: 62 */
>         /* sum members: 9522, holes: 14, sum holes: 54 */
>         /* last cacheline: 40 bytes */
> };
>
>


-- 
Yunkai Zhang
Work at Taobao

Re: struct member packing

Posted by Igor Galić <i....@brainsware.org>.
> > How is that going to fare with ABI compatibility?
> > How does this compare between 32bit and 64bit?
> > How does it compare between different platforms?
> 
> This all depends on the changes we make (if any). pahole is showing
> you the actual structure layout by examining the debug information.
> There's only a couple of places where we expose structs to plugins,
> so I don't believe that there's much ABI at risk. As for different
> platforms, you would need to build on the platform that you care
> about examine the debug infor on that platform. I expect that the
> layout would not be significantly different.

ACK.

> It would be interesting to dump some memory stats from a busy site
> and figure out which structures are hot.

+1

-- 
Igor Galić

Tel: +43 (0) 664 886 22 883
Mail: i.galic@brainsware.org
URL: http://brainsware.org/
GPG: 6880 4155 74BD FD7C B515  2EA5 4B1D 9E08 A097 C9AE


Re: struct member packing

Posted by James Peach <ja...@me.com>.
On Dec 18, 2012, at 2:29 AM, Igor Galić <i....@brainsware.org> wrote:

> 
> 
> ----- Original Message -----
>> Hi all,
>> 
>> I came across pahole <http://linux.die.net/man/1/pahole>, and it's
>> able to show some pretty interesting info about ATS internal
>> structures. It looks like there's probably some opportunities to
>> pack structure better, and probably improve cache utilization.
>> 
>> For example, HttpSM could be 54 bytes smaller:
> 
> Awesome! And, as always, I'm here to ask the uncomfortable questions
> so no one else has to:
> 
> How is that going to fare with ABI compatibility?
> How does this compare between 32bit and 64bit?
> How does it compare between different platforms?

This all depends on the changes we make (if any). pahole is showing you the actual structure layout by examining the debug information. There's only a couple of places where we expose structs to plugins, so I don't believe that there's much ABI at risk. As for different platforms, you would need to build on the platform that you care about examine the debug infor on that platform. I expect that the layout would not be significantly different.

It would be interesting to dump some memory stats from a busy site and figure out which structures are hot.

> 
>> ~/trafficserver.git$ pahole -C HttpSM ./proxy/.libs/traffic_server
>> die__process_function: tag not supported (template_type_parameter)!
>> die__process_class: tag not supported (template_value_parameter)!
>> struct HttpSM : public Continuation {
>> 	struct History {
>> 		const char  *              fileline;             /*     0     8 */
>> 		short unsigned int         event;                /*     8     2 */
>> 		short int                  reentrancy;           /*    10     2 */
>> 
>> 		/* size: 16, cachelines: 1, members: 3 */
>> 		/* last cacheline: 16 bytes */
>> 
>> 		/* BRAIN FART ALERT! 16 != 12 + 0(holes), diff = 4 */
>> 
>> 	};
>> 
>> 	class Link_debug_link : public Link<HttpSM> {
>> 	public:
>> 
>> 		/* struct Link<HttpSM>        <ancestor>; */     /*     0    16 */
>> 		class HttpSM * & next_link(class HttpSM *); /*
>> 		linkage=_ZN6HttpSM15Link_debug_link9next_linkEPS_ */
>> 
>> 		class HttpSM * & prev_link(class HttpSM *); /*
>> 		linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPS_ */
>> 
>> 		const class HttpSM  * next_link(const class HttpSM  *); /*
>> 		linkage=_ZN6HttpSM15Link_debug_link9next_linkEPKS_ */
>> 
>> 		const class HttpSM  * prev_link(const class HttpSM  *); /*
>> 		linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPKS_ */
>> 
>> 
>> 		/* size: 16, cachelines: 1, members: 1 */
>> 		/* last cacheline: 16 bytes */
>> 	};
>> 
>> public:
>> 
>> 	/* struct Continuation        <ancestor>; */     /*     0    48 */
>> 	int64_t                    sm_id;                /*    48     8 */
>> 	unsigned int               magic;                /*    56     4 */
>> 	bool                       enable_redirection;   /*    60     1 */
>> 	bool                       api_enable_redirection; /*    61     1 */
>> 
>> 	/* XXX 2 bytes hole, try to pack */
>> 
>> 	/* --- cacheline 1 boundary (64 bytes) --- */
>> 	char *                     redirect_url;         /*    64     8 */
>> 	int                        redirect_url_len;     /*    72     4 */
>> 	int                        redirection_tries;    /*    76     4 */
>> 	int64_t                    transfered_bytes;     /*    80     8 */
>> 	bool                       post_failed;          /*    88     1 */
>> 	bool                       debug_on;             /*    89     1 */
>> 
>> 	/* XXX 2 bytes hole, try to pack */
>> 
>> 	enum HttpPluginTunnel_t    plugin_tunnel_type;   /*    92     4 */
>> 	class PluginVCCore *       plugin_tunnel;        /*    96     8 */
>> 	struct State               t_state;              /*   104  5920 */
>> protected:
>> 
>> 	/* --- cacheline 94 boundary (6016 bytes) was 8 bytes ago --- */
>> 	int                        reentrancy_count;     /*  6024     4 */
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	struct History             history[64];          /*  6032  1024 */
>> 	/* --- cacheline 110 boundary (7040 bytes) was 16 bytes ago --- */
>> 	int                        history_pos;          /*  7056     4 */
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	class HttpTunnel          tunnel;                /*  7064  1072 */
>> 	/* --- cacheline 127 boundary (8128 bytes) was 8 bytes ago --- */
>> 	struct HttpVCTable         vc_table;             /*  8136   288 */
>> 	/* --- cacheline 131 boundary (8384 bytes) was 40 bytes ago --- */
>> 	class HttpVCTableEntry *   ua_entry;             /*  8424     8 */
>> 	class HttpClientSession *  ua_session;           /*  8432     8 */
>> 	enum BackgroundFill_t      background_fill;      /*  8440     4 */
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	/* --- cacheline 132 boundary (8448 bytes) --- */
>> 	class IOBufferReader *     ua_buffer_reader;     /*  8448     8 */
>> 	class HttpVCTableEntry *   server_entry;         /*  8456     8 */
>> 	class HttpServerSession *  server_session;       /*  8464     8 */
>> 	int                        shared_session_retries; /*  8472     4 */
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	class IOBufferReader *     server_buffer_reader; /*  8480     8 */
>> 	struct HttpTransformInfo   transform_info;       /*  8488    16 */
>> 	struct HttpTransformInfo   post_transform_info;  /*  8504    16 */
>> 	/* --- cacheline 133 boundary (8512 bytes) was 8 bytes ago --- */
>> 	class HttpCacheSM         cache_sm;              /*  8520   184 */
>> 	/* --- cacheline 136 boundary (8704 bytes) --- */
>> 	class HttpCacheSM         transform_cache_sm;    /*  8704   184 */
>> 	/* --- cacheline 138 boundary (8832 bytes) was 56 bytes ago --- */
>> 	class HttpCacheSM *        second_cache_sm;      /*  8888     8 */
>> 	/* --- cacheline 139 boundary (8896 bytes) --- */
>> 	HttpSMHandler              default_handler;      /*  8896    16 */
>> 	class Action *             pending_action;       /*  8912     8 */
>> 	class Action *             historical_action;    /*  8920     8 */
>> 	class Continuation *       schedule_cont;        /*  8928     8 */
>> 	struct HTTPParser          http_parser;          /*  8936    48 */
>> 	/* --- cacheline 140 boundary (8960 bytes) was 24 bytes ago --- */
>> 	enum StateMachineAction_t  last_action;          /*  8984     4 */
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	struct {
>> 		int                (*__pfn)(class HttpSM *, int, void *); /*  8992
>> 		8 */
>> 		long int           __delta;              /*  9000     8 */
>> 	} m_last_state;                                  /*  8992    16 */
>> 	int                        client_request_hdr_bytes; /*  9008     4
>> 	*/
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	int64_t                    client_request_body_bytes; /*  9016     8
>> 	*/
>> 	/* --- cacheline 141 boundary (9024 bytes) --- */
>> 	int                        server_request_hdr_bytes; /*  9024     4
>> 	*/
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	int64_t                    server_request_body_bytes; /*  9032     8
>> 	*/
>> 	int                        server_response_hdr_bytes; /*  9040     4
>> 	*/
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	int64_t                    server_response_body_bytes; /*  9048
>> 	8 */
>> 	int                        client_response_hdr_bytes; /*  9056     4
>> 	*/
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	int64_t                    client_response_body_bytes; /*  9064
>> 	8 */
>> 	int                        cache_response_hdr_bytes; /*  9072     4
>> 	*/
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	int64_t                    cache_response_body_bytes; /*  9080     8
>> 	*/
>> 	/* --- cacheline 142 boundary (9088 bytes) --- */
>> 	int                        pushed_response_hdr_bytes; /*  9088     4
>> 	*/
>> 
>> 	/* XXX 4 bytes hole, try to pack */
>> 
>> 	int64_t                    pushed_response_body_bytes; /*  9096
>> 	8 */
>> 	class TransactionMilestones milestones;          /*  9104   152 */
>> 	/* --- cacheline 144 boundary (9216 bytes) was 40 bytes ago --- */
>> 	int                        hooks_set;            /*  9256     4 */
>> 	TSHttpHookID               cur_hook_id;          /*  9260     4 */
>> 	class APIHook *            cur_hook;             /*  9264     8 */
>> 	int64_t                    prev_hook_start_time; /*  9272     8 */
>> 	/* --- cacheline 145 boundary (9280 bytes) --- */
>> 	int                        cur_hooks;            /*  9280     4 */
>> 	enum HttpApiState_t        callout_state;        /*  9284     4 */
>> 	class HttpAPIHooks        api_hooks;             /*  9288   264 */
>> 	/* --- cacheline 149 boundary (9536 bytes) was 16 bytes ago --- */
>> 	bool                       terminate_sm;         /*  9552     1 */
>> 	bool                       kill_this_async_done; /*  9553     1 */
>> 
>> 	/* XXX 6 bytes hole, try to pack */
>> 
>> 	struct Link<HttpSM>        debug_link;           /*  9560    16 */
>>        ...
>> 
>> 	/* vtable has 4 entries: {
>> 	   [2] = destroy(_ZN6HttpSM7destroyEv),
>> 	   [3] = handle_api_return(_ZN6HttpSM17handle_api_returnEv),
>> 	   [4] = set_next_state(_ZN6HttpSM14set_next_stateEv),
>> 	   [5] = kill_this_async_hook(_ZN6HttpSM20kill_this_async_hookEiPv),
>> 	} */
>> 	/* size: 9576, cachelines: 150, members: 62 */
>> 	/* sum members: 9522, holes: 14, sum holes: 54 */
>> 	/* last cacheline: 40 bytes */
>> };
>> 
>> 
> 
> -- 
> Igor Galić
> 
> Tel: +43 (0) 664 886 22 883
> Mail: i.galic@brainsware.org
> URL: http://brainsware.org/
> GPG: 6880 4155 74BD FD7C B515  2EA5 4B1D 9E08 A097 C9AE
> 


Re: struct member packing

Posted by Igor Galić <i....@brainsware.org>.

----- Original Message -----
> Hi all,
> 
> I came across pahole <http://linux.die.net/man/1/pahole>, and it's
> able to show some pretty interesting info about ATS internal
> structures. It looks like there's probably some opportunities to
> pack structure better, and probably improve cache utilization.
> 
> For example, HttpSM could be 54 bytes smaller:

Awesome! And, as always, I'm here to ask the uncomfortable questions
so no one else has to:

How is that going to fare with ABI compatibility?
How does this compare between 32bit and 64bit?
How does it compare between different platforms?

> ~/trafficserver.git$ pahole -C HttpSM ./proxy/.libs/traffic_server
> die__process_function: tag not supported (template_type_parameter)!
> die__process_class: tag not supported (template_value_parameter)!
> struct HttpSM : public Continuation {
> 	struct History {
> 		const char  *              fileline;             /*     0     8 */
> 		short unsigned int         event;                /*     8     2 */
> 		short int                  reentrancy;           /*    10     2 */
> 
> 		/* size: 16, cachelines: 1, members: 3 */
> 		/* last cacheline: 16 bytes */
> 
> 		/* BRAIN FART ALERT! 16 != 12 + 0(holes), diff = 4 */
> 
> 	};
> 
> 	class Link_debug_link : public Link<HttpSM> {
> 	public:
> 
> 		/* struct Link<HttpSM>        <ancestor>; */     /*     0    16 */
> 		class HttpSM * & next_link(class HttpSM *); /*
> 		linkage=_ZN6HttpSM15Link_debug_link9next_linkEPS_ */
> 
> 		class HttpSM * & prev_link(class HttpSM *); /*
> 		linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPS_ */
> 
> 		const class HttpSM  * next_link(const class HttpSM  *); /*
> 		linkage=_ZN6HttpSM15Link_debug_link9next_linkEPKS_ */
> 
> 		const class HttpSM  * prev_link(const class HttpSM  *); /*
> 		linkage=_ZN6HttpSM15Link_debug_link9prev_linkEPKS_ */
> 
> 
> 		/* size: 16, cachelines: 1, members: 1 */
> 		/* last cacheline: 16 bytes */
> 	};
> 
> public:
> 
> 	/* struct Continuation        <ancestor>; */     /*     0    48 */
> 	int64_t                    sm_id;                /*    48     8 */
> 	unsigned int               magic;                /*    56     4 */
> 	bool                       enable_redirection;   /*    60     1 */
> 	bool                       api_enable_redirection; /*    61     1 */
> 
> 	/* XXX 2 bytes hole, try to pack */
> 
> 	/* --- cacheline 1 boundary (64 bytes) --- */
> 	char *                     redirect_url;         /*    64     8 */
> 	int                        redirect_url_len;     /*    72     4 */
> 	int                        redirection_tries;    /*    76     4 */
> 	int64_t                    transfered_bytes;     /*    80     8 */
> 	bool                       post_failed;          /*    88     1 */
> 	bool                       debug_on;             /*    89     1 */
> 
> 	/* XXX 2 bytes hole, try to pack */
> 
> 	enum HttpPluginTunnel_t    plugin_tunnel_type;   /*    92     4 */
> 	class PluginVCCore *       plugin_tunnel;        /*    96     8 */
> 	struct State               t_state;              /*   104  5920 */
> protected:
> 
> 	/* --- cacheline 94 boundary (6016 bytes) was 8 bytes ago --- */
> 	int                        reentrancy_count;     /*  6024     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	struct History             history[64];          /*  6032  1024 */
> 	/* --- cacheline 110 boundary (7040 bytes) was 16 bytes ago --- */
> 	int                        history_pos;          /*  7056     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	class HttpTunnel          tunnel;                /*  7064  1072 */
> 	/* --- cacheline 127 boundary (8128 bytes) was 8 bytes ago --- */
> 	struct HttpVCTable         vc_table;             /*  8136   288 */
> 	/* --- cacheline 131 boundary (8384 bytes) was 40 bytes ago --- */
> 	class HttpVCTableEntry *   ua_entry;             /*  8424     8 */
> 	class HttpClientSession *  ua_session;           /*  8432     8 */
> 	enum BackgroundFill_t      background_fill;      /*  8440     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	/* --- cacheline 132 boundary (8448 bytes) --- */
> 	class IOBufferReader *     ua_buffer_reader;     /*  8448     8 */
> 	class HttpVCTableEntry *   server_entry;         /*  8456     8 */
> 	class HttpServerSession *  server_session;       /*  8464     8 */
> 	int                        shared_session_retries; /*  8472     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	class IOBufferReader *     server_buffer_reader; /*  8480     8 */
> 	struct HttpTransformInfo   transform_info;       /*  8488    16 */
> 	struct HttpTransformInfo   post_transform_info;  /*  8504    16 */
> 	/* --- cacheline 133 boundary (8512 bytes) was 8 bytes ago --- */
> 	class HttpCacheSM         cache_sm;              /*  8520   184 */
> 	/* --- cacheline 136 boundary (8704 bytes) --- */
> 	class HttpCacheSM         transform_cache_sm;    /*  8704   184 */
> 	/* --- cacheline 138 boundary (8832 bytes) was 56 bytes ago --- */
> 	class HttpCacheSM *        second_cache_sm;      /*  8888     8 */
> 	/* --- cacheline 139 boundary (8896 bytes) --- */
> 	HttpSMHandler              default_handler;      /*  8896    16 */
> 	class Action *             pending_action;       /*  8912     8 */
> 	class Action *             historical_action;    /*  8920     8 */
> 	class Continuation *       schedule_cont;        /*  8928     8 */
> 	struct HTTPParser          http_parser;          /*  8936    48 */
> 	/* --- cacheline 140 boundary (8960 bytes) was 24 bytes ago --- */
> 	enum StateMachineAction_t  last_action;          /*  8984     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	struct {
> 		int                (*__pfn)(class HttpSM *, int, void *); /*  8992
> 		8 */
> 		long int           __delta;              /*  9000     8 */
> 	} m_last_state;                                  /*  8992    16 */
> 	int                        client_request_hdr_bytes; /*  9008     4
> 	*/
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	int64_t                    client_request_body_bytes; /*  9016     8
> 	*/
> 	/* --- cacheline 141 boundary (9024 bytes) --- */
> 	int                        server_request_hdr_bytes; /*  9024     4
> 	*/
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	int64_t                    server_request_body_bytes; /*  9032     8
> 	*/
> 	int                        server_response_hdr_bytes; /*  9040     4
> 	*/
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	int64_t                    server_response_body_bytes; /*  9048
> 	8 */
> 	int                        client_response_hdr_bytes; /*  9056     4
> 	*/
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	int64_t                    client_response_body_bytes; /*  9064
> 	8 */
> 	int                        cache_response_hdr_bytes; /*  9072     4
> 	*/
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	int64_t                    cache_response_body_bytes; /*  9080     8
> 	*/
> 	/* --- cacheline 142 boundary (9088 bytes) --- */
> 	int                        pushed_response_hdr_bytes; /*  9088     4
> 	*/
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	int64_t                    pushed_response_body_bytes; /*  9096
> 	8 */
> 	class TransactionMilestones milestones;          /*  9104   152 */
> 	/* --- cacheline 144 boundary (9216 bytes) was 40 bytes ago --- */
> 	int                        hooks_set;            /*  9256     4 */
> 	TSHttpHookID               cur_hook_id;          /*  9260     4 */
> 	class APIHook *            cur_hook;             /*  9264     8 */
> 	int64_t                    prev_hook_start_time; /*  9272     8 */
> 	/* --- cacheline 145 boundary (9280 bytes) --- */
> 	int                        cur_hooks;            /*  9280     4 */
> 	enum HttpApiState_t        callout_state;        /*  9284     4 */
> 	class HttpAPIHooks        api_hooks;             /*  9288   264 */
> 	/* --- cacheline 149 boundary (9536 bytes) was 16 bytes ago --- */
> 	bool                       terminate_sm;         /*  9552     1 */
> 	bool                       kill_this_async_done; /*  9553     1 */
> 
> 	/* XXX 6 bytes hole, try to pack */
> 
> 	struct Link<HttpSM>        debug_link;           /*  9560    16 */
>         ...
> 
> 	/* vtable has 4 entries: {
> 	   [2] = destroy(_ZN6HttpSM7destroyEv),
> 	   [3] = handle_api_return(_ZN6HttpSM17handle_api_returnEv),
> 	   [4] = set_next_state(_ZN6HttpSM14set_next_stateEv),
> 	   [5] = kill_this_async_hook(_ZN6HttpSM20kill_this_async_hookEiPv),
> 	} */
> 	/* size: 9576, cachelines: 150, members: 62 */
> 	/* sum members: 9522, holes: 14, sum holes: 54 */
> 	/* last cacheline: 40 bytes */
> };
> 
> 

-- 
Igor Galić

Tel: +43 (0) 664 886 22 883
Mail: i.galic@brainsware.org
URL: http://brainsware.org/
GPG: 6880 4155 74BD FD7C B515  2EA5 4B1D 9E08 A097 C9AE