You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by we...@apache.org on 2013/12/04 04:39:03 UTC
[1/6] refine the codes of cluster
Updated Branches:
refs/heads/refine_cluster 7ffc10a9c -> 62504a9f8 (forced update)
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/session.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/session.cc b/iocore/cluster/session.cc
new file mode 100644
index 0000000..7adead6
--- /dev/null
+++ b/iocore/cluster/session.cc
@@ -0,0 +1,1267 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/epoll.h>
+#include "Diags.h"
+#include "machine.h"
+#include "global.h"
+#include "connection.h"
+#include "clusterinterface.h"
+#include "nio.h"
+#ifndef TS_INLINE
+#define TS_INLINE inline
+#endif
+#include "I_IOBuffer.h"
+#include "P_Cluster.h"
+#include "P_RecCore.h"
+#include "session.h"
+
+#ifndef USE_MULTI_ALLOCATOR
+static Allocator in_message_allocator("InMessage", sizeof(InMessage), 1024);
+#endif
+
+static Allocator session_allocator("SessionEntry", sizeof(SessionEntry), 1024);
+
+static MachineSessions *all_sessions; //[src ip % MAX_MACHINE_COUNT]
+static ink_mutex session_lock;
+static int my_machine_id = 0;
+
+struct SessionRecords {
+ RecRecord * create_total_count; //create session total count
+ RecRecord * create_success_count; //create session success count
+ RecRecord * create_retry_times; //create session retry times
+ RecRecord * close_total_count; //close session count
+ RecRecord * close_success_count; //close session success count
+ RecRecord * session_miss_count; //session miss count
+ RecRecord * session_occupied_count; //session occupied count
+};
+
+static SessionRecords server_session_records = {NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+static SessionRecords client_session_records = {NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+
+static void init_session_stat(SessionRecords *pSessionRecords, const char *prefix);
+
+inline static int get_session_machine_index(const unsigned int ip)
+{
+ int id;
+ int count;
+ int index;
+
+ id = ip % MAX_MACHINE_COUNT;
+ if (all_sessions[id].ip == ip) {
+ return id;
+ }
+
+ count = 1;
+ while (count <= MAX_MACHINE_COUNT) {
+ index = (id + count) % MAX_MACHINE_COUNT;
+ if (all_sessions[index].ip == ip) {
+ return index;
+ }
+ count++;
+ }
+
+ return -1;
+}
+
+static int alloc_session_machine_index(const unsigned int ip)
+{
+ int id;
+ int count;
+ int index;
+
+ id = ip % MAX_MACHINE_COUNT;
+ if (all_sessions[id].ip == 0) {
+ return id;
+ }
+
+ count = 1;
+ while (count <= MAX_MACHINE_COUNT) {
+ index = (id + count) % MAX_MACHINE_COUNT;
+ if (all_sessions[index].ip == 0) {
+ return index;
+ }
+ count++;
+ }
+
+ return -1;
+}
+
+inline static void release_in_message(SocketContext *pSockContext,
+ InMessage *pMessage)
+{
+ ink_atomic_increment(&pSockContext->thread_context->stats.
+ dequeue_in_msg_count, 1);
+ ink_atomic_increment(&pSockContext->thread_context->stats.
+ dequeue_in_msg_bytes, MSG_HEADER_LENGTH + pMessage->data_len);
+
+ pMessage->blocks = NULL; //free pointer
+#ifdef USE_MULTI_ALLOCATOR
+ pSockContext->in_msg_allocator->free_void(pMessage);
+#else
+ (void)pSockContext;
+ in_message_allocator.free_void(pMessage);
+#endif
+}
+
+int init_machine_sessions(ClusterMachine *machine, const bool bMyself)
+{
+ int result;
+ int sessions_bytes;
+ int locks_bytes;
+ int machine_id;
+ MachineSessions *pMachineSessions;
+ ink_mutex *pLock;
+ ink_mutex *pLockEnd;
+
+ ink_mutex_acquire(&session_lock);
+ if ((machine_id=get_session_machine_index(machine->ip)) < 0) {
+ if ((machine_id=alloc_session_machine_index(machine->ip)) < 0) {
+ ink_mutex_release(&session_lock);
+ return ENOSPC;
+ }
+ }
+
+ pMachineSessions = all_sessions + machine_id;
+ if (pMachineSessions->init_done) { //already init
+ ink_mutex_release(&session_lock);
+ return 0;
+ }
+
+ pMachineSessions->is_myself = bMyself;
+ pMachineSessions->ip = machine->ip;
+
+ sessions_bytes = sizeof(SessionEntry) * max_session_count_per_machine;
+ pMachineSessions->sessions = (SessionEntry *)malloc(sessions_bytes);
+ if (pMachineSessions->sessions == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, sessions_bytes, errno, strerror(errno));
+ ink_mutex_release(&session_lock);
+ return errno != 0 ? errno : ENOMEM;
+ }
+ memset(pMachineSessions->sessions, 0, sessions_bytes);
+
+ locks_bytes = sizeof(ink_mutex) * session_lock_count_per_machine;
+ pMachineSessions->locks = (ink_mutex *)malloc(locks_bytes);
+ if (pMachineSessions->locks == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, locks_bytes, errno, strerror(errno));
+ ink_mutex_release(&session_lock);
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ pLockEnd = pMachineSessions->locks + session_lock_count_per_machine;
+ for (pLock=pMachineSessions->locks; pLock<pLockEnd; pLock++) {
+ if ((result=ink_mutex_init(pLock, "session_locks")) != 0) {
+ ink_mutex_release(&session_lock);
+ return result;
+ }
+ }
+
+ pMachineSessions->init_done = true;
+ ink_mutex_release(&session_lock);
+ return 0;
+}
+
+int session_init()
+{
+ int bytes;
+ int result;
+ ClusterMachine *myMachine;
+
+ bytes = sizeof(MachineSessions) * MAX_MACHINE_COUNT;
+ all_sessions = (MachineSessions *)malloc(bytes);
+ if (all_sessions == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ memset(all_sessions, 0, bytes);
+
+ myMachine = cluster_machines + 0;
+ if ((result=init_machine_sessions(myMachine, true)) != 0) {
+ return result;
+ }
+
+ if ((result=ink_mutex_init(&session_lock, "session_lock")) != 0) {
+ return result;
+ }
+
+ my_machine_id = get_session_machine_index(myMachine->ip);
+ Debug(CLUSTER_DEBUG_TAG, "my_machine_id: %d", my_machine_id);
+
+ init_session_stat(&server_session_records, "proxy.process.cluster.server_session");
+ init_session_stat(&client_session_records, "proxy.process.cluster.client_session");
+
+ return 0;
+}
+
+int cluster_create_session(ClusterSession *session,
+ const ClusterMachine *machine, void *arg, const int events)
+{
+ MachineSessions *pMachineSessions;
+ SessionEntry *pSessionEntry;
+ SocketContext *pSockContext;
+ int i;
+ int session_index;
+ int version;
+ SequenceType seq;
+
+ pMachineSessions = all_sessions + my_machine_id;
+
+ ink_atomic_increment(&pMachineSessions->session_stat.create_total_count, 1);
+
+ if ((pSockContext=get_socket_context(machine)) == NULL) {
+ return ENOENT;
+ }
+ version = pSockContext->version;
+
+ for (i=0; i<128; i++) {
+ seq = ink_atomic_increment(&pMachineSessions->current_seq, 1);
+ session_index = seq % max_session_count_per_machine;
+ pSessionEntry = pMachineSessions->sessions + session_index;
+ if (IS_SESSION_EMPTY(pSessionEntry->session_id)) {
+ SESSION_LOCK(pMachineSessions, session_index);
+ if (IS_SESSION_EMPTY(pSessionEntry->session_id)) {
+ pSessionEntry->session_id.fields.ip = my_machine_ip;
+ pSessionEntry->session_id.fields.timestamp = CURRENT_TIME();
+ pSessionEntry->session_id.fields.seq = seq;
+ pSessionEntry->sock_context = pSockContext;
+ pSessionEntry->user_data = arg;
+ pSessionEntry->response_events = events;
+ pSessionEntry->current_msg_seq = 0;
+ pSessionEntry->version = version;
+
+ *session = pSessionEntry->session_id;
+
+#ifdef TRIGGER_STAT_FLAG
+ if (pSessionEntry->response_events & RESPONSE_EVENT_NOTIFY_DEALER) {
+ pSessionEntry->stat_start_time = CURRENT_NS();
+ }
+#endif
+ SESSION_UNLOCK(pMachineSessions, session_index);
+
+ ink_atomic_increment(&pMachineSessions->session_stat.
+ create_success_count, 1);
+ ink_atomic_increment(&pMachineSessions->session_stat.
+ create_retry_times, i + 1);
+ return 0;
+ }
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ }
+ }
+
+ ink_atomic_increment(&pMachineSessions->session_stat.
+ create_retry_times, i);
+
+ return ENOSPC;
+}
+
+#define GET_MACHINE_INDEX(machine_id, ip, pMachineSessions, return_value) \
+ do { \
+ if ((machine_id=get_session_machine_index(ip)) < 0) { \
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, " \
+ "ip: %u not exist!", __LINE__, ip); \
+ return return_value; \
+ } \
+ pMachineSessions = all_sessions + machine_id; \
+ if (!(pMachineSessions)->init_done) { \
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, " \
+ "ip: %u not init!", __LINE__, ip); \
+ return return_value; \
+ } \
+ } while (0)
+
+
+inline static SessionEntry *get_session(
+ const ClusterSession *session_id, SessionEntry *pSession)
+{
+ SessionEntry *pCurrent;
+ pCurrent = pSession;
+ do {
+ if (IS_SESSION_EQUAL(pCurrent->session_id, *session_id)) {
+ return pCurrent;
+ }
+
+ pCurrent = pCurrent->next;
+ } while (pCurrent != NULL);
+
+ return NULL;
+}
+
+int cluster_bind_session(ClusterSession session, void *arg)
+{
+ SessionEntry *pSessionEntry;
+ MachineSessions *pMachineSessions;
+ int result;
+ int machine_id;
+ int session_index;
+
+ GET_MACHINE_INDEX(machine_id, session.fields.ip, pMachineSessions, ENOENT);
+
+ session_index = session.fields.seq % max_session_count_per_machine;
+ pSessionEntry = pMachineSessions->sessions + session_index;
+ SESSION_LOCK(pMachineSessions, session_index);
+ if ((pSessionEntry=get_session(&session, pSessionEntry)) != NULL) {
+ pSessionEntry->user_data = arg;
+ result = 0;
+ }
+ else {
+ result = ENOENT;
+ }
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ return result;
+}
+
+int cluster_set_events(ClusterSession session, const int events)
+{
+ SessionEntry *pSessionEntry;
+ MachineSessions *pMachineSessions;
+ SocketContext *pSockContext;
+ InMessage *pMessage;
+ void *user_data;
+ int result;
+ int machine_id;
+ int session_index;
+
+ GET_MACHINE_INDEX(machine_id, session.fields.ip, pMachineSessions, ENOENT);
+
+ session_index = session.fields.seq % max_session_count_per_machine;
+ pSessionEntry = pMachineSessions->sessions + session_index;
+ SESSION_LOCK(pMachineSessions, session_index);
+
+ if ((pSessionEntry=get_session(&session, pSessionEntry)) != NULL) {
+ pSockContext = pSessionEntry->sock_context;
+ if (pSockContext != NULL) {
+ if (events & RESPONSE_EVENT_NOTIFY_DEALER) {
+
+ //assert((pSessionEntry->response_events & RESPONSE_EVENT_NOTIFY_DEALER) == 0);
+
+#ifdef TRIGGER_STAT_FLAG
+ //for stat
+ if (pMachineSessions->is_myself) { //client
+ pSessionEntry->stat_start_time = CURRENT_NS();
+ }
+ else { //server
+ if (pSessionEntry->stat_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->trigger_stat.count, 1);
+ ink_atomic_increment(&pMachineSessions->trigger_stat.time_used,
+ CURRENT_NS() - pSessionEntry->stat_start_time);
+ pSessionEntry->stat_start_time = 0;
+ }
+ }
+#endif
+
+ pMessage = pSessionEntry->messages;
+ if (pMessage == NULL) {
+ pSessionEntry->response_events = events; //waiting for message to notify
+ }
+ else {
+ pSessionEntry->messages = pSessionEntry->messages->next; //consume one
+ }
+ }
+ else {
+ pMessage = NULL;
+ pSessionEntry->response_events = events;
+ }
+
+ user_data = pSessionEntry->user_data;
+ result = 0;
+ }
+ else {
+ pMessage = NULL;
+ user_data = NULL;
+ result = ENOENT;
+ }
+ }
+ else {
+ pSockContext = NULL;
+ pMessage = NULL;
+ user_data = NULL;
+ result = ENOENT;
+ }
+
+#ifdef TRIGGER_STAT_FLAG
+ if (pMessage != NULL) {
+ if (!pMachineSessions->is_myself) { //server
+ pSessionEntry->stat_start_time = CURRENT_NS();
+ }
+ }
+#endif
+ SESSION_UNLOCK(pMachineSessions, session_index);
+
+ if (pMessage != NULL) {
+ cluster_msg_deal_func(session, user_data, pMessage->func_id,
+ pMessage->blocks, pMessage->data_len);
+ release_in_message(pSockContext, pMessage);
+ }
+
+ return result;
+}
+
+void *cluster_close_session(ClusterSession session)
+{
+ void *old_data;
+ SessionEntry *previous;
+ SessionEntry *pSessionEntry;
+ MachineSessions *pMachineSessions;
+ InMessage *pMessage;
+ int machine_id;
+ int session_index;
+
+ GET_MACHINE_INDEX(machine_id, session.fields.ip, pMachineSessions, NULL);
+
+ ink_atomic_increment(&pMachineSessions->session_stat.close_total_count, 1);
+
+ session_index = session.fields.seq % max_session_count_per_machine;
+ pSessionEntry = pMachineSessions->sessions + session_index;
+ SESSION_LOCK(pMachineSessions, session_index);
+
+ previous = NULL;
+ do {
+ if (pSessionEntry->sock_context != NULL && IS_SESSION_EQUAL(
+ session, pSessionEntry->session_id))
+ {
+ break;
+ }
+
+ previous = pSessionEntry;
+ pSessionEntry = pSessionEntry->next;
+ } while (pSessionEntry != NULL);
+
+ if (pSessionEntry != NULL) { //found
+ old_data = pSessionEntry->user_data;
+ while (pSessionEntry->messages != NULL) {
+ pMessage = pSessionEntry->messages;
+ pSessionEntry->messages = pSessionEntry->messages->next;
+
+ release_in_message(pSessionEntry->sock_context, pMessage);
+ }
+ pSessionEntry->sock_context = NULL;
+ pSessionEntry->response_events = 0;
+ pSessionEntry->user_data = NULL;
+ CLEAR_SESSION(pSessionEntry->session_id);
+
+#ifdef TRIGGER_STAT_FLAG
+ if (pSessionEntry->stat_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->trigger_stat.count, 1);
+ ink_atomic_increment(&pMachineSessions->trigger_stat.time_used,
+ CURRENT_NS() - pSessionEntry->stat_start_time);
+ pSessionEntry->stat_start_time = 0;
+ }
+#endif
+
+ ink_atomic_increment(&pMachineSessions->session_stat.
+ close_success_count, 1);
+
+#ifdef MSG_TIME_STAT_FLAG
+ if (pMachineSessions->is_myself)
+ {//request by me
+ if (pSessionEntry->client_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->msg_stat.count, 1);
+ ink_atomic_increment(&pMachineSessions->msg_stat.time_used,
+ CURRENT_NS() - pSessionEntry->client_start_time);
+ pSessionEntry->client_start_time = 0;
+ }
+ }
+ else { //request by other
+ if (pSessionEntry->server_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->msg_stat.count, 1);
+ ink_atomic_increment(&pMachineSessions->msg_stat.time_used,
+ CURRENT_NS() - pSessionEntry->server_start_time);
+ pSessionEntry->server_start_time = 0;
+ }
+ }
+
+ if (pSessionEntry->send_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->msg_send.count, 1);
+ ink_atomic_increment(&pMachineSessions->msg_send.time_used,
+ CURRENT_NS() - pSessionEntry->send_start_time);
+ pSessionEntry->send_start_time = 0;
+ }
+#endif
+
+ if (previous == NULL) { //remove the head session
+ SessionEntry *pNextSession;
+ pNextSession = pSessionEntry->next;
+ if (pNextSession != NULL) {
+ memcpy(pSessionEntry, pNextSession, sizeof(SessionEntry));
+ session_allocator.free_void(pNextSession);
+ }
+ }
+ else {
+ previous->next = pSessionEntry->next;
+ session_allocator.free_void(pSessionEntry);
+ }
+ }
+ else {
+ old_data = NULL;
+ }
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ return old_data;
+}
+
+int get_session_for_send(const SessionId *session,
+ MachineSessions **ppMachineSessions, SessionEntry **sessionEntry)
+{
+ int machine_id;
+ int session_index;
+ int result;
+
+ GET_MACHINE_INDEX(machine_id, session->fields.ip, *ppMachineSessions, ENOENT);
+
+ session_index = session->fields.seq % max_session_count_per_machine;
+ *sessionEntry = (*ppMachineSessions)->sessions + session_index;
+ SESSION_LOCK(*ppMachineSessions, session_index);
+
+ if ((*sessionEntry=get_session(session, *sessionEntry)) == NULL) {
+ result = ENOENT;
+ }
+ else if ((*sessionEntry)->messages != NULL) { //you must consume the recv messages firstly
+ *sessionEntry = NULL;
+ result = EBUSY;
+ }
+ else {
+ result = 0;
+ }
+
+ SESSION_UNLOCK(*ppMachineSessions, session_index);
+ return result;
+}
+
+#ifdef MSG_TIME_STAT_FLAG
+int get_response_session_internal(const MsgHeader *pHeader,
+ MachineSessions **ppMachineSessions, SessionEntry **sessionEntry)
+{
+ SessionEntry *pSession;
+ SessionEntry *pCurrent;
+ int result;
+ int machine_id;
+ int session_index;
+
+ GET_MACHINE_INDEX(machine_id, pHeader->session_id.fields.ip,
+ *ppMachineSessions, ENOENT);
+
+ session_index = pHeader->session_id.fields.seq % max_session_count_per_machine;
+ pSession = (*ppMachineSessions)->sessions + session_index;
+ SESSION_LOCK(*ppMachineSessions, session_index);
+ pCurrent = pSession;
+ do {
+ if (IS_SESSION_EQUAL(pCurrent->session_id, pHeader->session_id)) {
+ *sessionEntry = pCurrent;
+ result = 0;
+ break;
+ }
+
+ pCurrent = pCurrent->next;
+ } while (pCurrent != NULL);
+
+ if (pCurrent == NULL) {
+ if ((*ppMachineSessions)->is_myself) { //request by me
+ *sessionEntry = NULL;
+ result = ENOENT;
+ }
+ else {
+ if (IS_SESSION_EMPTY(pSession->session_id)) {
+ if (pHeader->msg_seq == 1) { //first time, should create
+ *sessionEntry = pSession;
+ result = 0;
+ }
+ else {
+ *sessionEntry = NULL;
+ result = ENOENT;
+ }
+ }
+ else {
+ *sessionEntry = NULL;
+ result = EEXIST;
+ }
+ }
+ }
+
+ SESSION_UNLOCK(*ppMachineSessions, session_index);
+ return result;
+}
+#endif
+
+int get_response_session(const MsgHeader *pHeader,
+ MachineSessions **ppMachineSessions, SessionEntry **sessionEntry,
+ SocketContext *pSocketContext, bool *call_func, void **user_data)
+{
+ SessionEntry *pSession;
+ SessionEntry *pTail;
+ SessionEntry *pCurrent;
+ int result;
+ int machine_id;
+ int session_index;
+ int chain_count;
+
+ GET_MACHINE_INDEX(machine_id, pHeader->session_id.fields.ip,
+ *ppMachineSessions, ENOENT);
+
+ session_index = pHeader->session_id.fields.seq % max_session_count_per_machine;
+ pSession = (*ppMachineSessions)->sessions + session_index;
+ SESSION_LOCK(*ppMachineSessions, session_index);
+ do {
+ pCurrent = pSession;
+ do {
+ if (IS_SESSION_EQUAL(pCurrent->session_id, pHeader->session_id)) {
+ *sessionEntry = pCurrent;
+ *user_data = pCurrent->user_data;
+ result = 0;
+
+ if (pCurrent->response_events & RESPONSE_EVENT_NOTIFY_DEALER) {
+ pCurrent->response_events = 0;
+ *call_func = true;
+ }
+ else {
+ *call_func = false;
+ }
+
+ break;
+ }
+
+ pCurrent = pCurrent->next;
+ } while (pCurrent != NULL);
+
+ if (pCurrent != NULL) { //found
+ pSession = pCurrent;
+ break;
+ }
+
+ if ((*ppMachineSessions)->is_myself) { //request by me
+ if (IS_SESSION_EMPTY(pSession->session_id)) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "client sessionEntry: %16lX:%lX not exist, func_id: %d",
+ __LINE__, pHeader->session_id.ids[0],
+ pHeader->session_id.ids[1], pHeader->func_id);
+ *sessionEntry = NULL;
+ *call_func = false;
+ *user_data = NULL;
+ result = ENOENT;
+
+ ink_atomic_increment(&(*ppMachineSessions)->session_stat.
+ session_miss_count, 1);
+ break;
+ }
+ }
+ else { //request by other
+ if (pHeader->msg_seq > 1) { //should discard the message
+ *sessionEntry = NULL;
+ *user_data = NULL;
+ *call_func = false;
+ result = ENOENT;
+
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "server sessionEntry: %08X:%u:%"PRId64" not exist, msg seq: %u, "
+ "func_id: %d, data_len: %d",
+ __LINE__, pHeader->session_id.fields.ip,
+ pHeader->session_id.fields.timestamp,
+ pHeader->session_id.ids[1], pHeader->msg_seq,
+ pHeader->func_id, pHeader->data_len);
+
+ ink_atomic_increment(&(*ppMachineSessions)->session_stat.
+ session_miss_count, 1);
+ break;
+ }
+
+ if (IS_SESSION_EMPTY(pSession->session_id)) {
+ pTail = NULL;
+ chain_count = 0;
+ }
+ else {
+ chain_count = 1;
+ pTail = pSession;
+ if (pSession->next != NULL) {
+ ++chain_count;
+ pTail = pSession->next;
+ pSession = pTail->next;
+ while (pSession != NULL) {
+ pTail = pSession;
+ pSession = pSession->next;
+ ++chain_count;
+ }
+ }
+
+ pSession = (SessionEntry *)session_allocator.alloc_void();
+ pSession->messages = NULL;
+ pSession->user_data = NULL;
+ pSession->next = NULL;
+
+#ifdef TRIGGER_STAT_FLAG
+ pSession->stat_start_time = 0;
+#endif
+#ifdef MSG_TIME_STAT_FLAG
+ pSession->client_start_time = 0;
+ pSession->server_start_time = 0;
+ pSession->send_start_time = 0;
+#endif
+ }
+
+ //first time, should create
+ pSession->session_id = pHeader->session_id; //set sessionEntry id
+ pSession->sock_context = pSocketContext;
+ pSession->version = pSocketContext->version;
+ pSession->response_events = 0;
+ pSession->current_msg_seq = 0;
+ if (pTail != NULL) {
+ pTail->next = pSession;
+
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "sessionEntry: %08X:%u:%"PRId64", chain count: %d",
+ __LINE__, pHeader->session_id.fields.ip,
+ pHeader->session_id.fields.timestamp,
+ pHeader->session_id.ids[1], chain_count + 1);
+ }
+
+ *sessionEntry = pSession;
+ *user_data = NULL;
+ *call_func = true;
+ result = 0;
+
+ ink_atomic_increment(&(*ppMachineSessions)->session_stat.
+ create_total_count, 1);
+ break;
+ }
+
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "sessionEntry: %08X:%u:%"PRId64", position occupied by %08X:%u:%"PRId64", "
+ "quest by me: %d, time distance: %u, func_id: %d",
+ __LINE__, pHeader->session_id.fields.ip,
+ pHeader->session_id.fields.timestamp, pHeader->session_id.ids[1],
+ pSession->session_id.fields.ip, pSession->session_id.fields.timestamp,
+ pSession->session_id.ids[1], machine_id == my_machine_id,
+ pHeader->session_id.fields.timestamp -
+ pSession->session_id.fields.timestamp, pHeader->func_id);
+ *sessionEntry = NULL;
+ *user_data = NULL;
+ *call_func = false;
+ result = EEXIST;
+
+ ink_atomic_increment(&(*ppMachineSessions)->session_stat.
+ session_occupied_count, 1);
+ } while (0);
+
+#ifdef TRIGGER_STAT_FLAG
+ if (*call_func) {
+ //stat
+ if ((*ppMachineSessions)->is_myself) { //request by me
+ if (pSession->stat_start_time != 0) {
+ ink_atomic_increment(&(*ppMachineSessions)->trigger_stat.count, 1);
+ ink_atomic_increment(&(*ppMachineSessions)->trigger_stat.time_used,
+ CURRENT_NS() - pSession->stat_start_time);
+ pSession->stat_start_time = 0;
+ }
+ }
+ else {
+ pSession->stat_start_time = CURRENT_NS();
+ }
+ }
+#endif
+
+ SESSION_UNLOCK(*ppMachineSessions, session_index);
+ return result;
+}
+
+static int do_notify_connection_closed(const int src_machine_id,
+ SocketContext *pSockContext)
+{
+ int count;
+ int session_index;
+ SessionEntry *pcurrent;
+ SessionEntry *pSessionEntry;
+ SessionEntry *pSessionEnd;
+ void *user_data;
+ bool call_func;
+ SessionId session_id;
+
+ count = 0;
+ pSessionEnd = all_sessions[src_machine_id].sessions +
+ max_session_count_per_machine;
+ for (pSessionEntry=all_sessions[src_machine_id].sessions;
+ pSessionEntry<pSessionEnd; pSessionEntry++)
+ {
+ pcurrent = pSessionEntry;
+ do {
+ if (pcurrent->sock_context == pSockContext) {
+ session_index = pSessionEntry - all_sessions[src_machine_id].sessions;
+ SESSION_LOCK(all_sessions + src_machine_id, session_index);
+ call_func = (pcurrent->response_events &
+ RESPONSE_EVENT_NOTIFY_DEALER) && (pcurrent->messages == NULL);
+ session_id = pcurrent->session_id;
+ user_data = pcurrent->user_data;
+ SESSION_UNLOCK(all_sessions + src_machine_id, session_index);
+
+ if (call_func) {
+ cluster_msg_deal_func(session_id, user_data,
+ FUNC_ID_CONNECTION_CLOSED_NOTIFY, NULL, 0);
+ }
+ else {
+ push_in_message(session_id, all_sessions + src_machine_id,
+ pcurrent, FUNC_ID_CONNECTION_CLOSED_NOTIFY, NULL, 0);
+ }
+
+ count++;
+ }
+
+ pcurrent = pcurrent->next;
+ } while (pcurrent != NULL);
+ }
+
+ return count;
+}
+
+int notify_connection_closed(SocketContext *pSockContext)
+{
+ int count1;
+ int count2;
+ int machine_id;
+
+ count1 = do_notify_connection_closed(my_machine_id, pSockContext);
+ if (count1 > 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "notify my session close count: %d", __LINE__, count1);
+ }
+
+ machine_id = get_session_machine_index(pSockContext->machine->ip);
+ if (machine_id >= 0 && all_sessions[machine_id].init_done) {
+ count2 = do_notify_connection_closed(machine_id, pSockContext);
+ if (count2 > 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "notify %s session close count: %d", __LINE__,
+ pSockContext->machine->hostname, count2);
+ }
+ }
+ else {
+ count2 = 0;
+ }
+
+ return count1 + count2;
+}
+
+int push_in_message(const SessionId session,
+ MachineSessions *pMachineSessions, SessionEntry *pSessionEntry,
+ const int func_id, IOBufferBlock *blocks, const int data_len)
+{
+ SocketContext *pSockContext;
+ InMessage *pMessage;
+ void *user_data;
+ int session_index;
+ bool call_func;
+
+ session_index = session.fields.seq % max_session_count_per_machine;
+ SESSION_LOCK(pMachineSessions, session_index);
+ pSockContext = pSessionEntry->sock_context;
+ if (!(pSockContext != NULL && IS_SESSION_EQUAL(pSessionEntry->session_id,
+ session)))
+ {
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ return ENOENT;
+ }
+
+#ifdef USE_MULTI_ALLOCATOR
+ pMessage = (InMessage *)pSockContext->in_msg_allocator->alloc_void();
+#else
+ pMessage = (InMessage *)in_message_allocator.alloc_void();
+#endif
+
+ if (pMessage == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, (int)sizeof(InMessage), errno, strerror(errno));
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ pMessage->blocks.m_ptr = NULL; //must set to NULL before set value
+ pMessage->func_id = func_id;
+ pMessage->blocks = blocks;
+ pMessage->data_len = data_len;
+ pMessage->next = NULL;
+
+ if (pSessionEntry->messages == NULL) {
+ pSessionEntry->messages = pMessage;
+ }
+ else if (pSessionEntry->messages->next == NULL) {
+ pSessionEntry->messages->next = pMessage;
+ }
+ else {
+ InMessage *pTail;
+ pTail = pSessionEntry->messages->next;
+ while (pTail->next != NULL) {
+ pTail = pTail->next;
+ }
+ pTail->next = pMessage;
+ }
+
+ //check if notify dealer
+ if (pSessionEntry->response_events & RESPONSE_EVENT_NOTIFY_DEALER) {
+ pSessionEntry->response_events = 0;
+ pMessage = pSessionEntry->messages;
+ pSessionEntry->messages = pSessionEntry->messages->next; //consume one
+ user_data = pSessionEntry->user_data;
+ call_func = true;
+ }
+ else {
+ user_data = NULL;
+ call_func = false;
+ }
+
+#ifdef TRIGGER_STAT_FLAG
+ if (call_func) {
+ if (!pMachineSessions->is_myself) { //server
+ pSessionEntry->stat_start_time = CURRENT_NS();
+ }
+ }
+#endif
+ SESSION_UNLOCK(pMachineSessions, session_index);
+
+ pSockContext->thread_context->stats.enqueue_in_msg_count++;
+ pSockContext->thread_context->stats.enqueue_in_msg_bytes +=
+ MSG_HEADER_LENGTH + data_len;
+
+ if (call_func) {
+ cluster_msg_deal_func(session, user_data, pMessage->func_id,
+ pMessage->blocks, pMessage->data_len);
+
+ release_in_message(pSockContext, pMessage);
+ }
+
+ return 0;
+}
+
+static void set_session_stat(SessionRecords *pSessionRecords,
+ const SessionStat *pNewtat, SessionStat *pOldStat)
+{
+ if (pNewtat->create_total_count != pOldStat->create_total_count) {
+ pOldStat->create_total_count = pNewtat->create_total_count;
+ RecDataSetFromInk64(RECD_INT, &pSessionRecords->create_total_count->data,
+ pNewtat->create_total_count);
+ }
+ if (pNewtat->create_success_count != pOldStat->create_success_count) {
+ pOldStat->create_success_count = pNewtat->create_success_count;
+ RecDataSetFromInk64(RECD_INT, &pSessionRecords->create_success_count->data,
+ pNewtat->create_success_count);
+ }
+ if (pNewtat->create_retry_times != pOldStat->create_retry_times) {
+ pOldStat->create_retry_times = pNewtat->create_retry_times;
+ RecDataSetFromInk64(RECD_INT, &pSessionRecords->create_retry_times->data,
+ pNewtat->create_retry_times);
+ }
+ if (pNewtat->close_total_count != pOldStat->close_total_count) {
+ pOldStat->close_total_count = pNewtat->close_total_count;
+ RecDataSetFromInk64(RECD_INT, &pSessionRecords->close_total_count->data,
+ pNewtat->close_total_count);
+ }
+ if (pNewtat->close_success_count != pOldStat->close_success_count) {
+ pOldStat->close_success_count = pNewtat->close_success_count;
+ RecDataSetFromInk64(RECD_INT, &pSessionRecords->close_success_count->data,
+ pNewtat->close_success_count);
+ }
+ if (pNewtat->session_miss_count != pOldStat->session_miss_count) {
+ pOldStat->session_miss_count = pNewtat->session_miss_count;
+ RecDataSetFromInk64(RECD_INT, &pSessionRecords->session_miss_count->data,
+ pNewtat->session_miss_count);
+ }
+ if (pNewtat->session_occupied_count != pOldStat->session_occupied_count) {
+ pOldStat->session_occupied_count = pNewtat->session_occupied_count;
+ RecDataSetFromInk64(RECD_INT, &pSessionRecords->session_occupied_count->data,
+ pNewtat->session_occupied_count);
+ }
+}
+
+static void init_session_stat(SessionRecords *pSessionRecords, const char *prefix)
+{
+ char name[256];
+ RecData data_default;
+ memset(&data_default, 0, sizeof(RecData));
+
+ sprintf(name, "%s.create_total_count", prefix);
+ pSessionRecords->create_total_count = RecRegisterStat(RECT_PROCESS,
+ name, RECD_INT, data_default, RECP_NON_PERSISTENT);
+
+ sprintf(name, "%s.create_success_count", prefix);
+ pSessionRecords->create_success_count = RecRegisterStat(RECT_PROCESS,
+ name, RECD_INT, data_default, RECP_NON_PERSISTENT);
+
+ sprintf(name, "%s.create_retry_times", prefix);
+ pSessionRecords->create_retry_times = RecRegisterStat(RECT_PROCESS,
+ name, RECD_INT, data_default, RECP_NON_PERSISTENT);
+
+ sprintf(name, "%s.close_total_count", prefix);
+ pSessionRecords->close_total_count = RecRegisterStat(RECT_PROCESS,
+ name, RECD_INT, data_default, RECP_NON_PERSISTENT);
+
+ sprintf(name, "%s.close_success_count", prefix);
+ pSessionRecords->close_success_count = RecRegisterStat(RECT_PROCESS,
+ name, RECD_INT, data_default, RECP_NON_PERSISTENT);
+
+ sprintf(name, "%s.miss_count", prefix);
+ pSessionRecords->session_miss_count = RecRegisterStat(RECT_PROCESS,
+ name, RECD_INT, data_default, RECP_NON_PERSISTENT);
+
+ sprintf(name, "%s.occupied_count", prefix);
+ pSessionRecords->session_occupied_count = RecRegisterStat(RECT_PROCESS,
+ name, RECD_INT, data_default, RECP_NON_PERSISTENT);
+}
+
+
+void log_session_stat()
+{
+ ClusterMachine *pMachine;
+ ClusterMachine *pMachineEnd;
+ int machine_id;
+ MachineSessions *pServerSessions;
+ MachineSessions *pClientSessions;
+ SessionStat serverSessionStat;
+ static SessionStat serverOldStat = {0, 0, 0, 0, 0, 0, 0};
+ static SessionStat clientOldStat = {0, 0, 0, 0, 0, 0, 0};
+
+ serverSessionStat.create_total_count = 0;
+ serverSessionStat.create_success_count = 0;
+ serverSessionStat.create_retry_times = 0;
+ serverSessionStat.close_total_count = 0;
+ serverSessionStat.close_success_count = 0;
+ serverSessionStat.session_miss_count = 0;
+ serverSessionStat.session_occupied_count = 0;
+
+ pMachineEnd = cluster_machines + cluster_machine_count;
+ for (pMachine=cluster_machines; pMachine<pMachineEnd; pMachine++) {
+ if ((machine_id=get_session_machine_index(pMachine->ip)) < 0) {
+ continue;
+ }
+ if (pMachine->dead || machine_id == my_machine_id) {
+ continue;
+ }
+
+ pServerSessions = all_sessions + machine_id;
+ serverSessionStat.create_total_count += pServerSessions->session_stat.
+ create_total_count;
+ serverSessionStat.close_total_count += pServerSessions->session_stat.
+ close_total_count;
+ serverSessionStat.close_success_count += pServerSessions->session_stat.
+ close_success_count;
+ serverSessionStat.session_miss_count += pServerSessions->session_stat.
+ session_miss_count;
+ serverSessionStat.session_occupied_count += pServerSessions->session_stat.
+ session_occupied_count;
+ }
+
+ serverSessionStat.create_success_count = serverSessionStat.create_total_count;
+ serverSessionStat.create_retry_times = serverSessionStat.create_total_count;
+
+ pClientSessions = all_sessions + my_machine_id;
+
+ set_session_stat(&server_session_records, &serverSessionStat, &serverOldStat);
+ set_session_stat(&client_session_records, (const SessionStat *)
+ &pClientSessions->session_stat, &clientOldStat);
+}
+
+#ifdef TRIGGER_STAT_FLAG
+void log_trigger_stat()
+{
+ ClusterMachine *pMachine;
+ ClusterMachine *pMachineEnd;
+ MachineSessions *pServerSessions;
+ MachineSessions *pClientSessions;
+ MsgTimeUsed serverTimeUsed;
+ int machine_id;
+ int server_avg_time_used;
+ int client_avg_time_used;
+
+ serverTimeUsed.count = 0;
+ serverTimeUsed.time_used = 0;
+
+ pMachineEnd = cluster_machines + cluster_machine_count;
+ for (pMachine=cluster_machines; pMachine<pMachineEnd; pMachine++) {
+ if ((machine_id=get_session_machine_index(pMachine->ip)) < 0) {
+ continue;
+ }
+ if (pMachine->dead || machine_id == my_machine_id) {
+ continue;
+ }
+
+ pServerSessions = all_sessions + machine_id;
+
+ serverTimeUsed.count += pServerSessions->trigger_stat.count;
+ serverTimeUsed.time_used += pServerSessions->trigger_stat.time_used;
+ if (pServerSessions->trigger_stat.count > 0) {
+ server_avg_time_used = pServerSessions->trigger_stat.time_used /
+ pServerSessions->trigger_stat.count;
+ }
+ else {
+ server_avg_time_used = 0;
+ }
+ Note("%s:%d trigger msg => %"PRId64", avg time used => %d us",
+ pMachine->hostname, pMachine->cluster_port,
+ pServerSessions->trigger_stat.count,
+ server_avg_time_used / 1000);
+
+ pServerSessions->trigger_stat.count = 0;
+ pServerSessions->trigger_stat.time_used = 0;
+ }
+
+ if (serverTimeUsed.count > 0) {
+ server_avg_time_used = serverTimeUsed.time_used / serverTimeUsed.count;
+ }
+ else {
+ server_avg_time_used = 0;
+ }
+ Note("SERVER: trigger msg => %"PRId64", avg time used => %d us",
+ serverTimeUsed.count, server_avg_time_used / 1000);
+
+ pClientSessions = all_sessions + my_machine_id;
+ if (pClientSessions->trigger_stat.count > 0) {
+ client_avg_time_used = pClientSessions->trigger_stat.time_used /
+ pClientSessions->trigger_stat.count;
+ }
+ else {
+ client_avg_time_used = 0;
+ }
+ Note("CLIENT: trigger msg => %"PRId64", avg time used => %d us\n",
+ pClientSessions->trigger_stat.count, client_avg_time_used / 1000);
+
+ pClientSessions->trigger_stat.count = 0;
+ pClientSessions->trigger_stat.time_used = 0;
+}
+#endif
+
+#ifdef MSG_TIME_STAT_FLAG
+void log_msg_time_stat()
+{
+ ClusterMachine *pMachine;
+ ClusterMachine *pMachineEnd;
+ MachineSessions *pServerSessions;
+ MachineSessions *pClientSessions;
+ MsgTimeUsed serverTimeUsed;
+ MsgTimeUsed sendTimeUsed;
+ int machine_id;
+ int server_avg_time_used;
+ int client_avg_time_used;
+ int send_avg_time_used;
+
+ serverTimeUsed.count = 0;
+ serverTimeUsed.time_used = 0;
+ sendTimeUsed.count = 0;
+ sendTimeUsed.time_used = 0;
+
+ pMachineEnd = cluster_machines + cluster_machine_count;
+ for (pMachine=cluster_machines; pMachine<pMachineEnd; pMachine++) {
+ if ((machine_id=get_session_machine_index(pMachine->ip)) < 0) {
+ continue;
+ }
+ if (pMachine->dead || machine_id == my_machine_id) {
+ continue;
+ }
+
+ pServerSessions = all_sessions + machine_id;
+ serverTimeUsed.count += pServerSessions->msg_stat.count;
+ serverTimeUsed.time_used += pServerSessions->msg_stat.time_used;
+ if (pServerSessions->msg_stat.count > 0) {
+ server_avg_time_used = pServerSessions->msg_stat.time_used /
+ pServerSessions->msg_stat.count;
+ }
+ else {
+ server_avg_time_used = 0;
+ }
+
+ sendTimeUsed.count += pServerSessions->msg_send.count;
+ sendTimeUsed.time_used += pServerSessions->msg_send.time_used;
+ if (pServerSessions->msg_send.count > 0) {
+ send_avg_time_used = pServerSessions->msg_send.time_used /
+ pServerSessions->msg_send.count;
+ }
+ else {
+ send_avg_time_used = 0;
+ }
+
+ Note("%s:%d msg count: %"PRId64", avg time used (recv start to send done): %d us, "
+ "send msg count: %"PRId64", send avg time: %d us",
+ pMachine->hostname, pMachine->cluster_port,
+ pServerSessions->msg_stat.count, server_avg_time_used / 1000,
+ pServerSessions->msg_send.count, send_avg_time_used / 1000);
+
+ pServerSessions->msg_stat.count = 0;
+ pServerSessions->msg_stat.time_used = 0;
+ pServerSessions->msg_send.count = 0;
+ pServerSessions->msg_send.time_used = 0;
+ }
+
+ if (serverTimeUsed.count > 0) {
+ server_avg_time_used = serverTimeUsed.time_used / serverTimeUsed.count;
+ }
+ else {
+ server_avg_time_used = 0;
+ }
+
+ if (sendTimeUsed.count > 0) {
+ send_avg_time_used = sendTimeUsed.time_used / sendTimeUsed.count;
+ }
+ else {
+ send_avg_time_used = 0;
+ }
+ Note("SERVER: msg count: %"PRId64", avg time used (recv start to send done): %d us, "
+ "send msg count: %"PRId64", send avg time: %d us",
+ serverTimeUsed.count, server_avg_time_used / 1000,
+ sendTimeUsed.count, send_avg_time_used / 1000);
+
+ pClientSessions = all_sessions + my_machine_id;
+ if (pClientSessions->msg_stat.count > 0) {
+ client_avg_time_used = pClientSessions->msg_stat.time_used /
+ pClientSessions->msg_stat.count;
+ }
+ else {
+ client_avg_time_used = 0;
+ }
+ if (pClientSessions->msg_send.count > 0) {
+ send_avg_time_used = pClientSessions->msg_send.time_used /
+ pClientSessions->msg_send.count;
+ }
+ else {
+ send_avg_time_used = 0;
+ }
+ Note("CLIENT: msg count: %"PRId64", avg time used (send start to recv done): %d us, "
+ "send msg count: %"PRId64", send avg time: %d us\n",
+ pClientSessions->msg_stat.count, client_avg_time_used / 1000,
+ pClientSessions->msg_send.count, send_avg_time_used / 1000);
+
+ pClientSessions->msg_stat.count = 0;
+ pClientSessions->msg_stat.time_used = 0;
+ pClientSessions->msg_send.count = 0;
+ pClientSessions->msg_send.time_used = 0;
+}
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/session.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/session.h b/iocore/cluster/session.h
new file mode 100644
index 0000000..9dd2559
--- /dev/null
+++ b/iocore/cluster/session.h
@@ -0,0 +1,97 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _SESSION_H_
+#define _SESSION_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "clusterinterface.h"
+
+typedef struct {
+ unsigned int ip;
+ bool init_done;
+ bool is_myself; //myself, the local host
+ SessionEntry *sessions;
+ ink_mutex *locks;
+ volatile SequenceType current_seq;
+ volatile SessionStat session_stat;
+
+#ifdef TRIGGER_STAT_FLAG
+ volatile MsgTimeUsed trigger_stat;
+#endif
+
+#ifdef MSG_TIME_STAT_FLAG
+ volatile MsgTimeUsed msg_stat;
+ volatile MsgTimeUsed msg_send;
+#endif
+
+} MachineSessions;
+
+#define SESSION_LOCK(pMachineSessions, session_index) \
+ ink_mutex_acquire((pMachineSessions)->locks + session_index % \
+ session_lock_count_per_machine)
+
+#define SESSION_UNLOCK(pMachineSessions, session_index) \
+ ink_mutex_release((pMachineSessions)->locks + session_index % \
+ session_lock_count_per_machine)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int session_init();
+int init_machine_sessions(ClusterMachine *machine, const bool bMyself);
+
+int get_session_for_send(const SessionId *session,
+ MachineSessions **ppMachineSessions, SessionEntry **sessionEntry);
+int get_response_session(const MsgHeader *pHeader,
+ MachineSessions **ppMachineSessions, SessionEntry **sessionEntry,
+ SocketContext *pSocketContext, bool *call_func, void **user_data);
+
+int notify_connection_closed(SocketContext *pSockContext);
+
+int push_in_message(const SessionId session,
+ MachineSessions *pMachineSessions, SessionEntry *pSessionEntry,
+ const int func_id, IOBufferBlock *blocks, const int data_len);
+
+void log_session_stat();
+
+#ifdef TRIGGER_STAT_FLAG
+void log_trigger_stat();
+#endif
+
+#ifdef MSG_TIME_STAT_FLAG
+int get_response_session_internal(const MsgHeader *pHeader,
+ MachineSessions **ppMachineSessions, SessionEntry **sessionEntry);
+void log_msg_time_stat();
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/types.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/types.h b/iocore/cluster/types.h
new file mode 100644
index 0000000..e11b00c
--- /dev/null
+++ b/iocore/cluster/types.h
@@ -0,0 +1,235 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _CLUSTER_TYPES_H_
+#define _CLUSTER_TYPES_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "clusterinterface.h"
+#include "libts.h"
+
+#define IP_ADDRESS_SIZE 16
+
+//#define USE_MULTI_ALLOCATOR 1
+#define CHECK_MAGIC_NUMBER 1
+
+#define PRIORITY_COUNT 3 //priority queue count
+
+//statistic marco defines
+//#define TRIGGER_STAT_FLAG 1 //trigger statistic flag
+//#define MSG_TIME_STAT_FLAG 1 //data statistic flag
+
+#define MSG_HEADER_LENGTH ((int)sizeof(MsgHeader))
+#define MAGIC_NUMBER 0x3308
+#define MAX_MSG_LENGTH (4 * 1024 * 1024)
+
+#define MAX_MACHINE_COUNT 255 //IMPORTANT: can't be 256!!
+
+//combine multi msg to call writev
+#define WRITEV_ARRAY_SIZE 128
+#define WRITEV_ITEM_ONCE (WRITEV_ARRAY_SIZE / 2)
+#define WRITE_MAX_COMBINE_BYTES (64 * 1024)
+
+#define CONNECT_TYPE_CLIENT 'C' //connect by me, client
+#define CONNECT_TYPE_SERVER 'S' //connect by peer, server
+
+#define DATA_TYPE_BUFFER 'B' //char buffer
+#define DATA_TYPE_OBJECT 'O' //IOBufferBlock pointer
+
+#define ALIGN_BYTES 8
+#define BYTE_ALIGN(x,l) (((x) + ((l) - 1)) & ~((l) - 1))
+#define BYTE_ALIGN8(x) BYTE_ALIGN(x, ALIGN_BYTES)
+
+#define IS_SESSION_EMPTY(session_id) \
+ ((session_id).ids[0] == 0 && (session_id).ids[1] == 0)
+
+#define IS_SESSION_EQUAL(session_id1, session_id2) \
+ ((session_id1).ids[0] == (session_id2).ids[0] && \
+ (session_id1).ids[1] == (session_id2).ids[1])
+
+typedef struct msg_timeused {
+ volatile int64_t count; //message count
+ volatile int64_t time_used; //time used
+} MsgTimeUsed;
+
+typedef struct session_stat {
+ volatile int64_t create_total_count; //create session total count
+ volatile int64_t create_success_count; //create session success count
+ volatile int64_t create_retry_times; //create session retry times
+ volatile int64_t close_total_count; //close session count
+ volatile int64_t close_success_count; //close session success count
+ volatile int64_t session_miss_count; //session miss count
+ volatile int64_t session_occupied_count; //session occupied count
+} SessionStat;
+
+typedef struct msg_header {
+#ifdef CHECK_MAGIC_NUMBER
+ short magic; //magic number
+ unsigned short msg_seq; //message sequence no base 1
+#else
+ uint32_t msg_seq; //message sequence no base 1
+#endif
+
+ int func_id; //function id, must be signed int
+ int data_len; //message body length
+ int aligned_data_len; //aligned body length
+ SessionId session_id; //session id
+} MsgHeader; //must aligned by 8 bytes
+
+typedef struct in_msg_entry {
+ int func_id; //function id
+ int data_len; //message body length
+ Ptr<IOBufferBlock> blocks;
+ struct in_msg_entry *next; //for income message queue
+} InMessage;
+
+struct worker_thread_context;
+struct socket_context;
+
+typedef struct session_entry {
+ SessionId session_id;
+ void *user_data; //user data for callback
+ struct socket_context *sock_context;
+ InMessage *messages; //income messages
+ int16_t response_events; //response events
+ uint16_t current_msg_seq; //current message sequence no
+ uint32_t version; //avoid CAS ABA
+ struct session_entry *next; //session chain, only for server session
+
+#ifdef TRIGGER_STAT_FLAG
+ volatile int64_t stat_start_time; //for message time used stat
+#endif
+
+#ifdef MSG_TIME_STAT_FLAG
+ volatile int64_t client_start_time; //send start time for client
+ volatile int64_t server_start_time; //recv done time for server
+ volatile int64_t send_start_time; //send start time for stat send time
+#endif
+
+} SessionEntry;
+
+//out message to send
+typedef struct out_msg_entry {
+ MsgHeader header;
+ char mini_buff[MINI_MESSAGE_SIZE]; //for mini message
+ Ptr<IOBufferBlock> blocks; //block data passed by caller
+
+ struct out_msg_entry *next; //for send queue
+ int bytes_sent; //important: including msg header
+ int data_type; //DATA_TYPE_BUFFER or DATA_TYPE_OBJECT
+ int64_t in_queue_time; //the time when push to send queue
+} OutMessage;
+
+//out message queue
+typedef struct message_queue {
+ OutMessage *head;
+ OutMessage *tail;
+ ink_mutex lock;
+} MessageQueue;
+
+//for recv messages
+typedef struct reader_manager {
+ Ptr<IOBufferData> buffer; //recv buffer
+ Ptr<IOBufferBlock> blocks; //recv blocks
+ char *msg_header; //current message start
+ char *current; //current pointer
+ char *buff_end; //buffer end
+ int recv_body_bytes; //recveived body bytes
+} ReaderManager;
+
+typedef struct socket_context {
+ int sock; //socket fd
+ char padding[ALIGN_BYTES]; //padding buffer
+ struct reader_manager reader; //recv buffer
+ struct ClusterMachine *machine; //peer machine, point to global machine
+ struct worker_thread_context *thread_context; //the thread belong to
+ MessageQueue send_queues[PRIORITY_COUNT]; //queue for send
+
+ int queue_index; //current deal queue index base 0
+ int connect_type; //CONNECT_TYPE_CLIENT or CONNECT_TYPE_SERVER
+ time_t connected_time; //connection established timestamp
+ uint32_t version; //avoid CAS ABA
+
+ int64_t next_write_time; //next time to send message
+
+ int ping_fail_count; //cluster ping fail counter
+ int64_t next_ping_time; //next time to send ping message
+ int64_t ping_start_time; //the start time of ping
+
+#ifdef USE_MULTI_ALLOCATOR
+ Allocator *out_msg_allocator; //for send
+ Allocator *in_msg_allocator; //for notify dealer
+#endif
+ struct socket_context *next; //for freelist
+} SocketContext;
+
+typedef struct socket_stats {
+ int64_t send_msg_count; //send msg count
+ int64_t drop_msg_count; //droped msg count when close socket
+ int64_t send_bytes;
+ int64_t drop_bytes;
+ int64_t call_writev_count;
+ int64_t send_retry_count;
+ int64_t send_delayed_time;
+
+ volatile int64_t push_msg_count; //push to send queue msg count
+ volatile int64_t push_msg_bytes; //push to send queue msg bytes
+
+ volatile int64_t fail_msg_count; //push to send queue fail msg count
+ volatile int64_t fail_msg_bytes; //push to send queue fail msg bytes
+
+ int64_t recv_msg_count; //recv msg count
+ int64_t enqueue_in_msg_count; //push into in msg queue
+ int64_t dequeue_in_msg_count; //pop from in msg queue
+ int64_t recv_bytes;
+ int64_t enqueue_in_msg_bytes; //push into in msg queue
+ int64_t dequeue_in_msg_bytes; //pop from in msg queue
+
+ int64_t call_read_count;
+ int64_t epoll_wait_count;
+ int64_t epoll_wait_time_used;
+ int64_t loop_usleep_count;
+ int64_t loop_usleep_time;
+
+ int64_t ping_total_count;
+ int64_t ping_success_count;
+ int64_t ping_time_used;
+} SocketStats;
+
+class EventPoll;
+
+typedef struct worker_thread_context
+{
+ EventPoll *ev_poll;
+ int alloc_size; //max count of epoll events
+ int thread_index; //my thread index
+ int active_sock_count;
+ SocketStats stats;
+ ink_mutex lock;
+ SocketContext **active_sockets;
+} WorkerThreadContext;
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/eventsystem/I_Event.h
----------------------------------------------------------------------
diff --git a/iocore/eventsystem/I_Event.h b/iocore/eventsystem/I_Event.h
index 7a37ea0..2659131 100644
--- a/iocore/eventsystem/I_Event.h
+++ b/iocore/eventsystem/I_Event.h
@@ -85,6 +85,7 @@
#define BLOCK_CACHE_EVENT_EVENTS_START 4000
#define UTILS_EVENT_EVENTS_START 5000
#define CONGESTION_EVENT_EVENTS_START 5100
+#define CLUSTER_MSG_START 6000
#define INK_API_EVENT_EVENTS_START 60000
#define SRV_EVENT_EVENTS_START 62000
#define REMAP_EVENT_EVENTS_START 63000
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/eventsystem/P_IOBuffer.h
----------------------------------------------------------------------
diff --git a/iocore/eventsystem/P_IOBuffer.h b/iocore/eventsystem/P_IOBuffer.h
index 0842aff..261aa1f 100644
--- a/iocore/eventsystem/P_IOBuffer.h
+++ b/iocore/eventsystem/P_IOBuffer.h
@@ -203,7 +203,7 @@ new_IOBufferData_internal(
void *b, int64_t size, int64_t asize_index)
{
(void) size;
- IOBufferData *d = THREAD_ALLOC(ioDataAllocator, this_ethread());
+ IOBufferData *d = ioDataAllocator.alloc();
d->_size_index = asize_index;
ink_assert(BUFFER_SIZE_INDEX_IS_CONSTANT(asize_index)
|| size <= d->block_size());
@@ -263,7 +263,7 @@ new_IOBufferData_internal(
#endif
int64_t size_index, AllocType type)
{
- IOBufferData *d = THREAD_ALLOC(ioDataAllocator, this_ethread());
+ IOBufferData *d = ioDataAllocator.alloc();
#ifdef TRACK_BUFFER_USER
d->_location = loc;
#endif
@@ -336,7 +336,7 @@ TS_INLINE void
IOBufferData::free()
{
dealloc();
- THREAD_FREE(this, ioDataAllocator, this_ethread());
+ ioDataAllocator.free(this);
}
//////////////////////////////////////////////////////////////////
@@ -352,7 +352,7 @@ new_IOBufferBlock_internal(
#endif
)
{
- IOBufferBlock *b = THREAD_ALLOC(ioBlockAllocator, this_ethread());
+ IOBufferBlock *b = ioBlockAllocator.alloc();
#ifdef TRACK_BUFFER_USER
b->_location = location;
#endif
@@ -366,7 +366,7 @@ new_IOBufferBlock_internal(
#endif
IOBufferData * d, int64_t len, int64_t offset)
{
- IOBufferBlock *b = THREAD_ALLOC(ioBlockAllocator, this_ethread());
+ IOBufferBlock *b = ioBlockAllocator.alloc();
#ifdef TRACK_BUFFER_USER
b->_location = location;
#endif
@@ -468,7 +468,7 @@ TS_INLINE void
IOBufferBlock::free()
{
dealloc();
- THREAD_FREE(this, ioBlockAllocator, this_ethread());
+ ioBlockAllocator.free(this);
}
TS_INLINE void
@@ -777,7 +777,7 @@ TS_INLINE MIOBuffer * new_MIOBuffer_internal(
#endif
int64_t size_index)
{
- MIOBuffer *b = THREAD_ALLOC(ioAllocator, this_ethread());
+ MIOBuffer *b = ioAllocator.alloc();
#ifdef TRACK_BUFFER_USER
b->_location = location;
#endif
@@ -790,7 +790,7 @@ free_MIOBuffer(MIOBuffer * mio)
{
mio->_writer = NULL;
mio->dealloc_all_readers();
- THREAD_FREE(mio, ioAllocator, this_ethread());
+ ioAllocator.free(mio);
}
TS_INLINE MIOBuffer * new_empty_MIOBuffer_internal(
@@ -799,7 +799,7 @@ TS_INLINE MIOBuffer * new_empty_MIOBuffer_internal(
#endif
int64_t size_index)
{
- MIOBuffer *b = THREAD_ALLOC(ioAllocator, this_ethread());
+ MIOBuffer *b = ioAllocator.alloc();
b->size_index = size_index;
#ifdef TRACK_BUFFER_USER
b->_location = location;
@@ -810,7 +810,7 @@ TS_INLINE MIOBuffer * new_empty_MIOBuffer_internal(
TS_INLINE void
free_empty_MIOBuffer(MIOBuffer * mio)
{
- THREAD_FREE(mio, ioAllocator, this_ethread());
+ ioAllocator.free(mio);
}
TS_INLINE IOBufferReader *
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/mgmt/RecordsConfig.cc
----------------------------------------------------------------------
diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
index 4a73f19..7677471 100644
--- a/mgmt/RecordsConfig.cc
+++ b/mgmt/RecordsConfig.cc
@@ -814,6 +814,24 @@ RecordElement RecordsConfig[] = {
,
{RECT_CONFIG, "proxy.config.cluster.cluster_port", RECD_INT, "8086", RECU_RESTART_TS, RR_REQUIRED, RECC_NULL, NULL, RECA_NULL}
,
+ {RECT_CONFIG, "proxy.config.cluster.flow_ctrl.min_bps", RECD_INT, "804857600", RECU_RESTART_TS, RR_REQUIRED, RECC_NULL, NULL, RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.flow_ctrl.max_bps", RECD_INT, "4194304000", RECU_RESTART_TS, RR_REQUIRED, RECC_NULL, NULL, RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.flow_ctrl.min_send_wait_time", RECD_INT, "1000", RECU_RESTART_TS, RR_REQUIRED, RECC_NULL, NULL, RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.flow_ctrl.max_send_wait_time", RECD_INT, "5000", RECU_RESTART_TS, RR_REQUIRED, RECC_NULL, NULL, RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.flow_ctrl.min_loop_interval", RECD_INT, "0", RECU_RESTART_TS, RR_REQUIRED, RECC_NULL, NULL, RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.flow_ctrl.max_loop_interval", RECD_INT, "1000", RECU_RESTART_TS, RR_REQUIRED, RECC_NULL, NULL, RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.max_sessions_per_machine", RECD_INT, "1000000", RECU_RESTART_TS, RR_NULL, RECC_INT, "[1000-4000000]", RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.session_locks_per_machine", RECD_INT, "10949", RECU_RESTART_TS, RR_NULL, RECC_INT, "[1-100000]", RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.cluster.read_buffer_size", RECD_INT, "2097152", RECU_RESTART_TS, RR_NULL, RECC_INT, "[65536-2097152]", RECA_NULL}
+ ,
{RECT_CONFIG, "proxy.config.cluster.cluster_configuration", RECD_STRING, "cluster.config", RECU_NULL, RR_NULL, RECC_NULL, NULL, RECA_NULL}
,
{RECT_CONFIG, "proxy.config.cluster.ethernet_interface", RECD_STRING, TS_BUILD_DEFAULT_LOOPBACK_IFACE, RECU_RESTART_TS, RR_REQUIRED, RECC_STR, "^[^[:space:]]*$", RECA_NULL}
[3/6] refine the codes of cluster
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/P_ClusterInline.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/P_ClusterInline.h b/iocore/cluster/P_ClusterInline.h
index c653956..1d26b3a 100644
--- a/iocore/cluster/P_ClusterInline.h
+++ b/iocore/cluster/P_ClusterInline.h
@@ -36,25 +36,30 @@ inline Action *
Cluster_lookup(Continuation * cont, CacheKey * key, CacheFragType frag_type, char *hostname, int host_len)
{
// Try to send remote, if not possible, handle locally
- Action *retAct;
- ClusterMachine *m = cluster_machine_at_depth(cache_hash(*key));
- if (m && !clusterProcessor.disable_remote_cluster_ops(m)) {
- CacheContinuation *cc = CacheContinuation::cacheContAllocator_alloc();
- cc->action = cont;
- cc->mutex = cont->mutex;
- retAct = CacheContinuation::do_remote_lookup(cont, key, cc, frag_type, hostname, host_len);
- if (retAct) {
- return retAct;
- } else {
- // not remote, do local lookup
- CacheContinuation::cacheContAllocator_free(cc);
- return (Action *) NULL;
- }
- } else {
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_LOOKUP_FAILED, 0);
- }
+// Action *retAct;
+// ClusterMachine *m = cluster_machine_at_depth(cache_hash(*key));
+// if (m && !clusterProcessor.disable_remote_cluster_ops(m)) {
+// CacheContinuation *cc = CacheContinuation::cacheContAllocator_alloc();
+// cc->action = cont;
+// cc->mutex = cont->mutex;
+// retAct = CacheContinuation::do_remote_lookup(cont, key, cc, frag_type, hostname, host_len);
+// if (retAct) {
+// return retAct;
+// } else {
+// // not remote, do local lookup
+// CacheContinuation::cacheContAllocator_free(cc);
+// return (Action *) NULL;
+// }
+// } else {
+// Action a;
+// a = cont;
+// return CacheContinuation::callback_failure(&a, CACHE_EVENT_LOOKUP_FAILED, 0);
+// }
+ (void) cont;
+ (void) key;
+ (void) frag_type;
+ (void) hostname;
+ (void) host_len;
return (Action *) NULL;
}
@@ -66,18 +71,24 @@ Cluster_read(ClusterMachine * owner_machine, int opcode,
time_t pin_in_cache, CacheFragType frag_type, char *hostname, int host_len)
{
(void) params;
- if (clusterProcessor.disable_remote_cluster_ops(owner_machine)) {
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_OPEN_READ_FAILED, 0);
+ ink_assert(cont);
+ ClusterSession session;
+ if (cluster_create_session(&session, owner_machine, NULL, 0)) {
+ cont->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
+
int vers = CacheOpMsg_long::protoToVersion(owner_machine->msg_proto_major);
+ CacheOpArgs_General readArgs;
+ Ptr<IOBufferData> d;
+
int flen;
int len = 0;
int cur_len;
int res = 0;
- char *msg;
+ char *msg = 0;
char *data;
+ Action *action = NULL;
if (vers == CacheOpMsg_long::CACHE_OP_LONG_MESSAGE_VERSION) {
if ((opcode == CACHE_OPEN_READ_LONG)
@@ -87,20 +98,21 @@ Cluster_read(ClusterMachine * owner_machine, int opcode,
const char *url_hostname;
int url_hlen;
- INK_MD5 url_only_md5;
+ INK_MD5 url_md5;
- Cache::generate_key(&url_only_md5, url, 0);
+ Cache::generate_key(&url_md5, url);
url_hostname = url->host_get(&url_hlen);
len += request->m_heap->marshal_length();
- len += params->marshal_length();
+ len += sizeof(CacheLookupHttpConfig) + params->marshal_length();
len += url_hlen;
if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
goto err_exit;
// Perform data Marshal operation
- msg = (char *) ALLOCA_DOUBLE(flen + len);
+ d = new_IOBufferData(iobuffer_size_to_index(flen + len));
+ msg = (char *) d->data();
data = msg + flen;
cur_len = len;
@@ -110,6 +122,13 @@ Cluster_read(ClusterMachine * owner_machine, int opcode,
}
data += res;
cur_len -= res;
+
+ if (cur_len < (int) sizeof(CacheLookupHttpConfig))
+ goto err_exit;
+ memcpy(data, params, sizeof(CacheLookupHttpConfig));
+ data += sizeof(CacheLookupHttpConfig);
+ cur_len -= sizeof(CacheLookupHttpConfig);
+
if ((res = params->marshal(data, cur_len)) < 0)
goto err_exit;
data += res;
@@ -117,37 +136,33 @@ Cluster_read(ClusterMachine * owner_machine, int opcode,
memcpy(data, url_hostname, url_hlen);
CacheOpArgs_General readArgs;
- readArgs.url_md5 = &url_only_md5;
+ readArgs.url_md5 = &url_md5;
readArgs.pin_in_cache = pin_in_cache;
readArgs.frag_type = frag_type;
- return CacheContinuation::do_op(cont, owner_machine, (void *) &readArgs,
- opcode, (char *) msg, (flen + len), -1, buf);
+
+ action = CacheContinuation::do_op(cont, session, (void *) &readArgs,
+ opcode, d, (flen + len), -1, buf);
} else {
// Build message if we have host data.
+ flen = op_to_sizeof_fixedlen_msg(opcode);
+ len = host_len;
- if (host_len) {
- // Determine length of data to Marshal
- flen = op_to_sizeof_fixedlen_msg(opcode);
- len = host_len;
-
- if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
- goto err_exit;
+ if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
+ goto err_exit;
- msg = (char *) ALLOCA_DOUBLE(flen + len);
- data = msg + flen;
+ d = new_IOBufferData(iobuffer_size_to_index(flen + len));
+ msg = (char *) d->data();
+ data = msg + flen;
+ if (host_len)
memcpy(data, hostname, host_len);
- } else {
- msg = 0;
- flen = 0;
- len = 0;
- }
- CacheOpArgs_General readArgs;
readArgs.url_md5 = key;
readArgs.frag_type = frag_type;
- return CacheContinuation::do_op(cont, owner_machine, (void *) &readArgs,
- opcode, (char *) msg, (flen + len), -1, buf);
+
+ action = CacheContinuation::do_op(cont, session, (void *) &readArgs,
+ opcode, d, (flen + len), -1, buf);
}
+ ink_assert(msg);
} else {
//////////////////////////////////////////////////////////////
@@ -155,10 +170,12 @@ Cluster_read(ClusterMachine * owner_machine, int opcode,
//////////////////////////////////////////////////////////////
ink_release_assert(!"CacheOpMsg_long [read] bad msg version");
}
+
+ if (action)
+ return action;
err_exit:
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_OPEN_READ_FAILED, 0);
+ cont->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
inline Action *
@@ -171,10 +188,11 @@ Cluster_write(Continuation * cont, int expected_size,
{
(void) key;
(void) request;
- if (clusterProcessor.disable_remote_cluster_ops(m)) {
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_OPEN_WRITE_FAILED, 0);
+ ClusterSession session;
+ ink_assert(cont);
+ if (cluster_create_session(&session, m, NULL, 0)) {
+ cont->handleEvent(CACHE_EVENT_OPEN_WRITE_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
char *msg = 0;
char *data = 0;
@@ -182,24 +200,22 @@ Cluster_write(Continuation * cont, int expected_size,
int len = 0;
int flen = 0;
int vers = CacheOpMsg_long::protoToVersion(m->msg_proto_major);
+ Ptr<IOBufferData> d;
switch (opcode) {
case CACHE_OPEN_WRITE:
{
// Build message if we have host data
- if (host_len) {
- // Determine length of data to Marshal
- flen = op_to_sizeof_fixedlen_msg(CACHE_OPEN_WRITE);
- len = host_len;
-
- if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
- goto err_exit;
-
- msg = (char *) ALLOCA_DOUBLE(flen + len);
- data = msg + flen;
+ len = host_len;
+ flen = op_to_sizeof_fixedlen_msg(CACHE_OPEN_WRITE);
+ if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
+ goto err_exit;
+ d = new_IOBufferData(iobuffer_size_to_index(flen + len));
+ msg = (char *) d->data();
+ data = msg + flen;
+ if (host_len)
memcpy(data, hostname, host_len);
- }
break;
}
case CACHE_OPEN_WRITE_LONG:
@@ -223,8 +239,9 @@ Cluster_write(Continuation * cont, int expected_size,
if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
goto err_exit;
+ d = new_IOBufferData(iobuffer_size_to_index(flen + len));
+ msg = (char *) d->data();
// Perform data Marshal operation
- msg = (char *) ALLOCA_DOUBLE(flen + len);
data = msg + flen;
int res = 0;
@@ -257,7 +274,9 @@ Cluster_write(Continuation * cont, int expected_size,
writeArgs.cfl_flags |= (old_info ? CFL_LOPENWRITE_HAVE_OLDINFO : 0);
writeArgs.cfl_flags |= (allow_multiple_writes ? CFL_ALLOW_MULTIPLE_WRITES : 0);
- return CacheContinuation::do_op(cont, m, (void *) &writeArgs, opcode, msg, flen + len, expected_size, buf);
+ Action *action = CacheContinuation::do_op(cont, session, (void *) &writeArgs, opcode, d, flen + len, expected_size, buf);
+ if (action)
+ return action;
} else {
//////////////////////////////////////////////////////////////
// Create the specified down rev version of this message
@@ -267,19 +286,21 @@ Cluster_write(Continuation * cont, int expected_size,
}
err_exit:
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_OPEN_WRITE_FAILED, 0);
+ cont->handleEvent(CACHE_EVENT_OPEN_WRITE_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
inline Action *
Cluster_link(ClusterMachine * m, Continuation * cont, CacheKey * from, CacheKey * to,
CacheFragType type, char *hostname, int host_len)
{
- if (clusterProcessor.disable_remote_cluster_ops(m)) {
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_LINK_FAILED, 0);
+ ClusterSession session;
+ Ptr<IOBufferData> d;
+ char *msg = NULL;
+
+ if (cluster_create_session(&session, m, NULL, 0)) {
+ cont->handleEvent(CACHE_EVENT_LINK_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
int vers = CacheOpMsg_short_2::protoToVersion(m->msg_proto_major);
@@ -293,7 +314,8 @@ Cluster_link(ClusterMachine * m, Continuation * cont, CacheKey * from, CacheKey
if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
goto err_exit;
- char *msg = (char *) ALLOCA_DOUBLE(flen + len);
+ d = new_IOBufferData(iobuffer_size_to_index(flen + len));
+ msg = (char *) d->data();
memcpy((msg + flen), hostname, host_len);
// Setup args for remote link
@@ -301,7 +323,9 @@ Cluster_link(ClusterMachine * m, Continuation * cont, CacheKey * from, CacheKey
linkArgs.from = from;
linkArgs.to = to;
linkArgs.frag_type = type;
- return CacheContinuation::do_op(cont, m, (void *) &linkArgs, CACHE_LINK, msg, (flen + len));
+ Action *action = CacheContinuation::do_op(cont, session, (void *) &linkArgs, CACHE_LINK, d, (flen + len));
+ if (action)
+ return action;
} else {
//////////////////////////////////////////////////////////////
// Create the specified down rev version of this message
@@ -311,18 +335,20 @@ Cluster_link(ClusterMachine * m, Continuation * cont, CacheKey * from, CacheKey
}
err_exit:
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_LINK_FAILED, 0);
+ cont->handleEvent(CACHE_EVENT_LINK_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
inline Action *
Cluster_deref(ClusterMachine * m, Continuation * cont, CacheKey * key, CacheFragType type, char *hostname, int host_len)
{
- if (clusterProcessor.disable_remote_cluster_ops(m)) {
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_DEREF_FAILED, 0);
+ ClusterSession session;
+ Ptr<IOBufferData> d;
+ char *msg = NULL;
+
+ if (cluster_create_session(&session, m, NULL, 0)) {
+ cont->handleEvent(CACHE_EVENT_DEREF_FAILED, NULL);
+ return ACTION_RESULT_DONE ;
}
int vers = CacheOpMsg_short::protoToVersion(m->msg_proto_major);
@@ -336,14 +362,17 @@ Cluster_deref(ClusterMachine * m, Continuation * cont, CacheKey * key, CacheFrag
if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
goto err_exit;
- char *msg = (char *) ALLOCA_DOUBLE(flen + len);
+ d = new_IOBufferData(iobuffer_size_to_index(flen + len));
+ msg = (char *) d->data();
memcpy((msg + flen), hostname, host_len);
// Setup args for remote deref
CacheOpArgs_Deref drefArgs;
drefArgs.md5 = key;
drefArgs.frag_type = type;
- return CacheContinuation::do_op(cont, m, (void *) &drefArgs, CACHE_DEREF, msg, (flen + len));
+ Action *action = CacheContinuation::do_op(cont, session, (void *) &drefArgs, CACHE_DEREF, d, (flen + len));
+ if (action)
+ return action;
} else {
//////////////////////////////////////////////////////////////
// Create the specified down rev version of this message
@@ -353,19 +382,22 @@ Cluster_deref(ClusterMachine * m, Continuation * cont, CacheKey * key, CacheFrag
}
err_exit:
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_DEREF_FAILED, 0);
+ cont->handleEvent(CACHE_EVENT_DEREF_FAILED, NULL);
+ return ACTION_RESULT_DONE ;
}
inline Action *
Cluster_remove(ClusterMachine * m, Continuation * cont, CacheKey * key,
bool rm_user_agents, bool rm_link, CacheFragType frag_type, char *hostname, int host_len)
{
- if (clusterProcessor.disable_remote_cluster_ops(m)) {
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_REMOVE_FAILED, 0);
+ ClusterSession session;
+ Ptr<IOBufferData> d;
+ char *msg = NULL;
+
+ if (cluster_create_session(&session, m, NULL, 0)) {
+ if (cont)
+ cont->handleEvent(CACHE_EVENT_REMOVE_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
int vers = CacheOpMsg_short::protoToVersion(m->msg_proto_major);
@@ -379,7 +411,8 @@ Cluster_remove(ClusterMachine * m, Continuation * cont, CacheKey * key,
if ((flen + len) > DEFAULT_MAX_BUFFER_SIZE) // Bound marshalled data
goto err_exit;
- char *msg = (char *) ALLOCA_DOUBLE(flen + len);
+ d = new_IOBufferData(iobuffer_size_to_index(flen + len));
+ msg = (char *) d->data();
memcpy((msg + flen), hostname, host_len);
// Setup args for remote update
@@ -388,7 +421,9 @@ Cluster_remove(ClusterMachine * m, Continuation * cont, CacheKey * key,
updateArgs.cfl_flags |= (rm_user_agents ? CFL_REMOVE_USER_AGENTS : 0);
updateArgs.cfl_flags |= (rm_link ? CFL_REMOVE_LINK : 0);
updateArgs.frag_type = frag_type;
- return CacheContinuation::do_op(cont, m, (void *) &updateArgs, CACHE_REMOVE, msg, (flen + len));
+ Action *action = CacheContinuation::do_op(cont, session, (void *) &updateArgs, CACHE_REMOVE, d, (flen + len));
+ if (action)
+ return action;
} else {
//////////////////////////////////////////////////////////////
// Create the specified down rev version of this message
@@ -398,9 +433,8 @@ Cluster_remove(ClusterMachine * m, Continuation * cont, CacheKey * key,
}
err_exit:
- Action a;
- a = cont;
- return CacheContinuation::callback_failure(&a, CACHE_EVENT_REMOVE_FAILED, 0);
+ if (cont)
+ cont->handleEvent(CACHE_EVENT_REMOVE_FAILED, NULL);
+ return ACTION_RESULT_DONE;
}
-
#endif /* __CLUSTERINLINE_H__ */
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/clusterinterface.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/clusterinterface.h b/iocore/cluster/clusterinterface.h
new file mode 100644
index 0000000..0f8e510
--- /dev/null
+++ b/iocore/cluster/clusterinterface.h
@@ -0,0 +1,104 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _CLUSTER_INTERFACE_H
+#define _CLUSTER_INTERFACE_H
+
+struct ClusterMachine;
+class IOBufferData;
+class IOBufferBlock;
+
+#define CLUSTER_DEBUG_TAG "cluster_io"
+
+#define new_RecvBuffer(len) \
+ new_IOBufferData(iobuffer_size_to_index(len, MAX_BUFFER_SIZE_INDEX))
+
+#define CURRENT_TIME() (ink_get_hrtime() / HRTIME_SECOND)
+#define CURRENT_MS() (ink_get_hrtime() / HRTIME_MSECOND)
+#define CURRENT_NS() (ink_get_hrtime() / HRTIME_NSECOND)
+
+#define MINI_MESSAGE_SIZE 64 //use internal buffer to store the mini message
+
+#define FUNC_ID_CONNECTION_CLOSED_NOTIFY 6100 //connection closed
+#define FUNC_ID_CLUSTER_PING_REQUEST 6201
+#define FUNC_ID_CLUSTER_PING_RESPONSE 6202
+#define FUNC_ID_CLUSTER_HELLO_REQUEST 6203
+#define FUNC_ID_CLUSTER_HELLO_RESPONSE 6204
+
+#define RESPONSE_EVENT_NOTIFY_DEALER 1
+
+typedef int64_t SequenceType;
+
+typedef union {
+ struct {
+ uint32_t ip; //src ip addr
+ uint32_t timestamp; //session create time
+ SequenceType seq; //session sequence number
+ } fields;
+
+ uint64_t ids[2]; //session id, 0 for free entry
+} SessionId;
+
+typedef SessionId ClusterSession;
+
+typedef enum {
+ PRIORITY_HIGH = 0,
+ PRIORITY_MID,
+ PRIORITY_LOW,
+} MessagePriority;
+
+
+#define CLEAR_SESSION(session_id) \
+ (session_id).ids[0] = (session_id).ids[1] = 0
+
+typedef int (*machine_change_notify_func)(ClusterMachine * m);
+
+typedef void (*message_deal_func)(ClusterSession session, void *arg,
+ const int func_id, IOBufferBlock *blocks, const int data_len);
+
+/*
+typedef void (*message_deal_func)(ClusterSession session, void *arg,
+ const int func_id, void *data, int data_len);
+*/
+
+int cluster_global_init(message_deal_func deal_func,
+ machine_change_notify_func machine_change_notify);
+
+int cluster_create_session(ClusterSession *session,
+ const struct ClusterMachine *machine, void *arg, const int events);
+
+int cluster_bind_session(ClusterSession session, void *arg);
+
+int cluster_set_events(ClusterSession session, const int events);
+
+void *cluster_close_session(ClusterSession session);
+
+/*
+ * data pointer as:
+ * data_len: -1 for IOBufferBlock *, >= 0 for char buffer
+ **/
+int cluster_send_message(ClusterSession session, const int func_id,
+ void *data, const int data_len, const MessagePriority priority);
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/connection.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/connection.cc b/iocore/cluster/connection.cc
new file mode 100644
index 0000000..856483c
--- /dev/null
+++ b/iocore/cluster/connection.cc
@@ -0,0 +1,1726 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#if defined(linux)
+#include <sys/prctl.h>
+#endif
+#include "Diags.h"
+#include "machine.h"
+#include "global.h"
+#include "nio.h"
+#include "message.h"
+#include "session.h"
+#include "P_Cluster.h"
+#include "ink_config.h"
+#include "EventPoll.h"
+#include "connection.h"
+
+typedef enum {
+ STATE_NOT_CONNECT = 0,
+ STATE_CONNECTING,
+ STATE_CONNECTED,
+ STATE_SEND_DATA,
+ STATE_RECV_DATA
+} ConnectState;
+
+typedef struct connect_context {
+ SocketContext *pSockContext;
+ int64_t connect_start_time; //connect start time in ms
+ int64_t server_start_time; //recv data start time in ms
+ int reconnect_interval; //reconnect interval in ms
+ int connect_count; //already connect times
+ int send_bytes;
+ int recv_bytes;
+ int total_bytes;
+ ConnectState state;
+ char buff[sizeof(MsgHeader) + sizeof(HelloMessage)];
+ bool is_accept; //true means server socket to accept
+ bool need_reconnect;
+ bool used;
+ bool need_check_timeout;
+} ConnectContext;
+
+struct connection_thread_context
+{
+ EventPoll *ev_poll;
+ int alloc_size;
+ ink_mutex lock;
+
+ ConnectContext *connections_buffer; //memory pool for malloc
+ ConnectContext **connections; //existing connections
+ int connection_count; //current connection count
+};
+
+static struct connection_thread_context connect_thread_context;
+static SocketContext *socket_contexts_pool = NULL; //first element for accept
+
+SocketContextsByMachine *machine_sockets = NULL; //sockets by peer machine, [dest ip % MAX_MACHINE_COUNT]
+
+void *connect_worker_entrance(void *arg);
+
+static int remove_connection(SocketContext *pSockContext, const bool needLock)
+{
+ ConnectContext **ppConnection;
+ ConnectContext **ppConnectionEnd;
+ ConnectContext **ppNext;
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "free connection, current count: %d", __LINE__,
+ connect_thread_context.connection_count);
+ */
+
+ if (needLock) {
+ ink_mutex_acquire(&connect_thread_context.lock);
+ }
+
+ ppConnectionEnd = connect_thread_context.connections +
+ connect_thread_context.connection_count;
+ for (ppConnection=connect_thread_context.connections; ppConnection<ppConnectionEnd;
+ ppConnection++)
+ {
+ if ((*ppConnection)->pSockContext == pSockContext) {
+ (*ppConnection)->used = false;
+ (*ppConnection)->pSockContext = NULL;
+ break;
+ }
+ }
+
+ if (ppConnection == ppConnectionEnd) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "Can't found connection to release!", __LINE__);
+ ink_mutex_release(&connect_thread_context.lock);
+ return ENOENT;
+ }
+
+ ppNext = ppConnection + 1;
+ while (ppNext < ppConnectionEnd) {
+ *(ppNext - 1) = *ppNext;
+ ppNext++;
+ }
+ connect_thread_context.connection_count--;
+
+ if (needLock) {
+ ink_mutex_release(&connect_thread_context.lock);
+ }
+
+ return 0;
+}
+
+static void close_connection(SocketContext *pSockContext)
+{
+ if (pSockContext->sock >= 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "close connection #%d %s:%d",
+ __LINE__, pSockContext->sock,
+ pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port);
+
+ close(pSockContext->sock);
+ pSockContext->sock = -1;
+ }
+}
+
+static void release_connection(SocketContext *pSockContext,
+ const bool needLock)
+{
+ close_connection(pSockContext);
+ if (pSockContext->connect_type == CONNECT_TYPE_SERVER) {
+ remove_connection(pSockContext, needLock);
+ free_accept_sock_context(pSockContext);
+ }
+}
+
+inline static int get_machine_index(const unsigned int ip)
+{
+ int id;
+ int count;
+ int index;
+
+ id = ip % MAX_MACHINE_COUNT;
+ if (machine_sockets[id].ip == ip) {
+ return id;
+ }
+
+ count = 1;
+ while (count <= MAX_MACHINE_COUNT) {
+ index = (id + count) % MAX_MACHINE_COUNT;
+ if (machine_sockets[index].ip == ip) {
+ return index;
+ }
+ count++;
+ }
+
+ return -1;
+}
+
+static int alloc_machine_index(const unsigned int ip)
+{
+ int id;
+ int count;
+ int index;
+
+ id = ip % MAX_MACHINE_COUNT;
+ if (machine_sockets[id].ip == 0) {
+ return id;
+ }
+
+ count = 1;
+ while (count <= MAX_MACHINE_COUNT) {
+ index = (id + count) % MAX_MACHINE_COUNT;
+ if (machine_sockets[index].ip == 0) {
+ return index;
+ }
+ count++;
+ }
+
+ Warning("file: "__FILE__", line: %d, "
+ "can't malloc slot for ip: %u.%u.%u.%u",
+ __LINE__, DOT_SEPARATED(ip));
+
+ return -1;
+}
+
+static void fill_send_buffer(ConnectContext *pConnectContext,
+ const int func_id)
+{
+ MsgHeader *pHeader;
+ HelloMessage *pHello;
+
+ pHeader = (MsgHeader *)pConnectContext->buff;
+#ifdef CHECK_MAGIC_NUMBER
+ pHeader->magic = MAGIC_NUMBER;
+#endif
+
+ pHeader->func_id = func_id;
+ pHeader->data_len = sizeof(HelloMessage);
+ pHeader->aligned_data_len = BYTE_ALIGN8(sizeof(HelloMessage));
+ pHeader->session_id.fields.ip = my_machine_ip;
+ pHeader->session_id.fields.timestamp = CURRENT_TIME();
+ pHeader->session_id.fields.seq = 0;
+ pHeader->msg_seq = 11111; //do not create session
+
+ pHello = (HelloMessage *)(pConnectContext->buff + sizeof(MsgHeader));
+ pHello->major = CLUSTER_MAJOR_VERSION;
+ pHello->minor = CLUSTER_MINOR_VERSION;
+ pHello->min_major = MIN_CLUSTER_MAJOR_VERSION;
+ pHello->min_minor = MIN_CLUSTER_MINOR_VERSION;
+
+ pConnectContext->send_bytes = 0;
+}
+
+static int deal_hello_message(SocketContext *pSockContext, char *data)
+{
+ int proto_major = -1;
+ int proto_minor = -1;
+ uint32_t major;
+ int expect_func_id;
+ MsgHeader *pHeader;
+ HelloMessage *pHelloMessage;
+
+ pHeader = (MsgHeader *)data;
+#ifdef CHECK_MAGIC_NUMBER
+ if (pHeader->magic != MAGIC_NUMBER) {
+ Error("file: "__FILE__", line: %d, "
+ "magic number: %08x != %08x",
+ __LINE__, pHeader->magic, MAGIC_NUMBER);
+ return EINVAL;
+ }
+#endif
+
+ if (pHeader->data_len != sizeof(HelloMessage)) {
+ Error("file: "__FILE__", line: %d, "
+ "message length: %d != %d!", __LINE__,
+ pHeader->data_len, (int)sizeof(HelloMessage));
+ return EINVAL;
+ }
+
+ if (pSockContext->connect_type == CONNECT_TYPE_CLIENT) {
+ expect_func_id = FUNC_ID_CLUSTER_HELLO_RESPONSE;
+ }
+ else {
+ expect_func_id = FUNC_ID_CLUSTER_HELLO_REQUEST;
+ }
+ if (pHeader->func_id != expect_func_id) {
+ Error("file: "__FILE__", line: %d, "
+ "invalid function id: %d != %d!", __LINE__,
+ pHeader->func_id, expect_func_id);
+ return EINVAL;
+ }
+ pHelloMessage = (HelloMessage *)(data + sizeof(MsgHeader));
+
+ /**
+ * Determine the message protocol major version to use, by stepping down
+ * from current to the minimium level until a match is found.
+ * Derive the minor number as follows, if the current (major, minor)
+ * is the current node (major, minor) use the given minor number.
+ * Otherwise, minor number is zero.
+ **/
+ for (major=pHelloMessage->major; major>=pHelloMessage->min_major; --major) {
+ if ((major >= MIN_CLUSTER_MAJOR_VERSION) && (major <= CLUSTER_MAJOR_VERSION)) {
+ proto_major = major;
+ }
+ }
+ if (proto_major > 0) {
+ /* Compute minor version */
+ if (proto_major == (int)pHelloMessage->major) {
+ proto_minor = pHelloMessage->minor;
+ if (proto_minor != CLUSTER_MINOR_VERSION) {
+ Warning("file: "__FILE__", line: %d, "
+ "Different clustering minor versions (%d,%d) for "
+ "node %u.%u.%u.%u, continuing", __LINE__,
+ proto_minor, CLUSTER_MINOR_VERSION,
+ DOT_SEPARATED(pSockContext->machine->ip));
+ }
+ } else {
+ proto_minor = 0;
+ }
+ }
+ else {
+ Error("file: "__FILE__", line: %d, "
+ "Bad cluster major version range (%d-%d) for "
+ "node %u.%u.%u.%u, close connection", __LINE__,
+ pHelloMessage->min_major, pHelloMessage->major,
+ DOT_SEPARATED(pSockContext->machine->ip));
+ return EINVAL;
+ }
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "node: %u.%u.%u.%u, version: %d.%d", __LINE__,
+ DOT_SEPARATED(pSockContext->machine->ip),
+ proto_major, proto_minor);
+ */
+
+ pSockContext->machine->msg_proto_major = pHelloMessage->major;
+ pSockContext->machine->msg_proto_minor = pHelloMessage->minor;
+ return 0;
+}
+
+
+static int do_send_data(ConnectContext *pConnectContext)
+{
+ int bytes;
+ int result;
+
+ bytes = write(pConnectContext->pSockContext->sock, pConnectContext->buff +
+ pConnectContext->send_bytes, pConnectContext->total_bytes -
+ pConnectContext->send_bytes);
+ if (bytes < 0) {
+ result = errno != 0 ? errno : EAGAIN;
+ if (result == EINTR) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "write to %s:%d fail, errno: %d, error info: %s",
+ __LINE__, pConnectContext->pSockContext->machine->hostname,
+ pConnectContext->pSockContext->machine->cluster_port,
+ result, strerror(result));
+ }
+ else if (!(result == EAGAIN)) {
+ Error("file: "__FILE__", line: %d, "
+ "write to %s:%d fail, errno: %d, error info: %s",
+ __LINE__, pConnectContext->pSockContext->machine->hostname,
+ pConnectContext->pSockContext->machine->cluster_port,
+ result, strerror(result));
+ }
+
+ return result;
+ }
+ else if (bytes == 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "%s:%d connection closed", __LINE__,
+ pConnectContext->pSockContext->machine->hostname,
+ pConnectContext->pSockContext->machine->cluster_port);
+ return ECONNRESET;
+ }
+ pConnectContext->send_bytes += bytes;
+
+ return (pConnectContext->send_bytes == pConnectContext->total_bytes) ?
+ 0 : EAGAIN;
+}
+
+static int do_recv_data(ConnectContext *pConnectContext)
+{
+ int bytes;
+ int result;
+
+ bytes = read(pConnectContext->pSockContext->sock, pConnectContext->buff +
+ pConnectContext->recv_bytes, pConnectContext->total_bytes -
+ pConnectContext->recv_bytes);
+ if (bytes < 0) {
+ result = errno != 0 ? errno : EAGAIN;
+ if (result == EINTR) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "read from %s:%d fail, errno: %d, error info: %s",
+ __LINE__, pConnectContext->pSockContext->machine->hostname,
+ pConnectContext->pSockContext->machine->cluster_port,
+ result, strerror(result));
+ }
+ else if (!(result == EAGAIN)) {
+ Error("file: "__FILE__", line: %d, "
+ "read from %s:%d fail, errno: %d, error info: %s",
+ __LINE__, pConnectContext->pSockContext->machine->hostname,
+ pConnectContext->pSockContext->machine->cluster_port,
+ result, strerror(result));
+ }
+
+ return result;
+ }
+ else if (bytes == 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "%s:%d connection closed", __LINE__,
+ pConnectContext->pSockContext->machine->hostname,
+ pConnectContext->pSockContext->machine->cluster_port);
+ return ECONNRESET;
+ }
+ pConnectContext->recv_bytes += bytes;
+
+ return (pConnectContext->recv_bytes == pConnectContext->total_bytes) ?
+ 0 : EAGAIN;
+}
+
+static int check_socket_status(int sock)
+{
+ int result;
+ socklen_t len;
+
+ len = sizeof(result);
+ if (getsockopt(sock, SOL_SOCKET, SO_ERROR,
+ &result, &len) < 0)
+ {
+ result = errno != 0 ? errno : EACCES;
+ }
+
+ return result;
+}
+
+static int connection_handler(ConnectContext *pConnectContext, const bool needLock)
+{
+ int result;
+ SocketContext *pSockContext;
+ int events;
+ bool bNew;
+
+ pSockContext = pConnectContext->pSockContext;
+ bNew = false;
+ events = 0;
+ result = 0;
+ switch (pConnectContext->state) {
+ case STATE_CONNECTING:
+ result = check_socket_status(pSockContext->sock);
+ if (result != 0) {
+ break;
+ }
+ pConnectContext->state = STATE_CONNECTED;
+ case STATE_CONNECTED:
+ if (pSockContext->connect_type == CONNECT_TYPE_CLIENT) {
+ events = EVENTIO_WRITE;
+ pConnectContext->state = STATE_SEND_DATA;
+ fill_send_buffer(pConnectContext, FUNC_ID_CLUSTER_HELLO_REQUEST);
+ }
+ else { //server
+ events = EVENTIO_READ;
+ bNew = true;
+ pConnectContext->state = STATE_RECV_DATA;
+ pConnectContext->recv_bytes = 0;
+ pConnectContext->server_start_time = CURRENT_MS();
+ }
+
+ break;
+ case STATE_SEND_DATA:
+ while ((result=do_send_data(pConnectContext)) == EINTR) {
+ }
+
+ if (result == EAGAIN) {
+ events = EVENTIO_WRITE;
+ break;
+ }
+ else if (result != 0) {
+ break;
+ }
+
+ //send data done
+ if (pSockContext->connect_type == CONNECT_TYPE_CLIENT) {
+ events = EVENTIO_READ;
+ pConnectContext->state = STATE_RECV_DATA;
+ pConnectContext->recv_bytes = 0;
+ pConnectContext->server_start_time = CURRENT_MS();
+ }
+ else { //server deal done
+ }
+ break;
+ case STATE_RECV_DATA:
+ while ((result=do_recv_data(pConnectContext)) == EINTR) {
+ }
+
+ if (result == EAGAIN) {
+ events = EVENTIO_READ;
+ break;
+ }
+ else if (result != 0) {
+ break;
+ }
+
+ //recv data done
+ result = deal_hello_message(pSockContext, pConnectContext->buff);
+ if (pSockContext->connect_type == CONNECT_TYPE_CLIENT) {
+ }
+ else if (result == 0) {
+ events = EVENTIO_WRITE;
+ pConnectContext->state = STATE_SEND_DATA;
+ fill_send_buffer(pConnectContext, FUNC_ID_CLUSTER_HELLO_RESPONSE);
+ }
+ break;
+ default:
+ result = EINVAL;
+ break;
+ }
+
+ if (events != 0) {
+ int ret;
+ if (bNew) {
+ ret = connect_thread_context.ev_poll->attach(pSockContext->sock,
+ events, pConnectContext);
+ }
+ else {
+ ret = connect_thread_context.ev_poll->modify(pSockContext->sock,
+ events, pConnectContext);
+ }
+ if (ret >= 0) {
+ return 0;
+ }
+
+ result = errno != 0 ? errno : ENOMEM;
+ Error("file: " __FILE__ ", line: %d, "
+ "event poll control fail, errno: %d, error info: %s",
+ __LINE__, result, strerror(result));
+ }
+
+ if (connect_thread_context.ev_poll->detach(pSockContext->sock) < 0) {
+ result = errno != 0 ? errno : ENOMEM;
+ Error("file: " __FILE__ ", line: %d, "
+ "event poll detach #%d fail, errno: %d, error info: %s",
+ __LINE__, pSockContext->sock,
+ result, strerror(result));
+ }
+
+ remove_connection(pSockContext, needLock);
+ if (result == 0) {
+ result = machine_add_connection(pSockContext);
+ if (result == 0) {
+ machine_up_notify(pSockContext->machine);
+ }
+ }
+
+ if (result != 0) {
+ close_connection(pSockContext);
+ if (pSockContext->connect_type == CONNECT_TYPE_SERVER) {
+ free_accept_sock_context(pSockContext);
+ }
+ }
+
+ return result;
+}
+
+#ifdef USE_MULTI_ALLOCATOR
+static void check_init_allocator(SocketContext *pSockContext)
+{
+ char name[64];
+ int index;
+
+ if (pSockContext->out_msg_allocator == NULL) {
+ index = pSockContext - socket_contexts_pool;
+ sprintf(name, "OutMessage_%d", index);
+ pSockContext->out_msg_allocator = new Allocator(name,
+ sizeof(OutMessage), 512);
+
+ sprintf(name, "InMessage_%d", index);
+ pSockContext->in_msg_allocator = new Allocator(name,
+ sizeof(InMessage), 128);
+ }
+}
+#endif
+
+static SocketContext *alloc_connect_sock_context(const unsigned int machine_ip)
+{
+ SocketContext *pSockContext;
+ int machine_id;
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ if ((machine_id=get_machine_index(machine_ip)) < 0) {
+ if ((machine_id=alloc_machine_index(machine_ip)) < 0) {
+ ink_mutex_release(&connect_thread_context.lock);
+ return NULL;
+ }
+
+ machine_sockets[machine_id].ip = machine_ip;
+ }
+
+ pSockContext = machine_sockets[machine_id].connect_free_list;
+ if (pSockContext != NULL) {
+ machine_sockets[machine_id].connect_free_list =
+ pSockContext->next;
+
+#ifdef USE_MULTI_ALLOCATOR
+ check_init_allocator(pSockContext);
+#endif
+ }
+ ink_mutex_release(&connect_thread_context.lock);
+
+ return pSockContext;
+}
+
+static void free_connect_sock_context(SocketContext *pSockContext,
+ const bool needLock)
+{
+ int machine_id;
+ if ((machine_id=get_machine_index(pSockContext->machine->ip)) < 0) {
+ Warning("file: "__FILE__", line: %d, "
+ "can't get slot for ip: %u.%u.%u.%u",
+ __LINE__, DOT_SEPARATED(pSockContext->machine->ip));
+ return;
+ }
+
+ if (needLock) {
+ ink_mutex_acquire(&connect_thread_context.lock);
+ }
+
+ pSockContext->next = machine_sockets[machine_id].connect_free_list;
+ machine_sockets[machine_id].connect_free_list = pSockContext;
+
+ if (needLock) {
+ ink_mutex_release(&connect_thread_context.lock);
+ }
+}
+
+static SocketContext *alloc_accept_sock_context(const unsigned int machine_ip)
+{
+ SocketContext *pSockContext;
+ int machine_id;
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ if ((machine_id=get_machine_index(machine_ip)) < 0) {
+ if ((machine_id=alloc_machine_index(machine_ip)) < 0) {
+ ink_mutex_release(&connect_thread_context.lock);
+ return NULL;
+ }
+
+ machine_sockets[machine_id].ip = machine_ip;
+ }
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "alloc slot for ip: %u.%u.%u.%u (%u)",
+ __LINE__, DOT_SEPARATED(machine_ip),
+ machine_sockets[machine_id].ip);
+ */
+
+ pSockContext = machine_sockets[machine_id].accept_free_list;
+ if (pSockContext != NULL) {
+ machine_sockets[machine_id].accept_free_list =
+ pSockContext->next;
+
+#ifdef USE_MULTI_ALLOCATOR
+ check_init_allocator(pSockContext);
+#endif
+ }
+ ink_mutex_release(&connect_thread_context.lock);
+
+ return pSockContext;
+}
+
+void free_accept_sock_context(SocketContext *pSockContext)
+{
+ int machine_id;
+ if ((machine_id=get_machine_index(pSockContext->machine->ip)) < 0) {
+ Warning("file: "__FILE__", line: %d, "
+ "can't get slot for ip: %u.%u.%u.%u",
+ __LINE__, DOT_SEPARATED(pSockContext->machine->ip));
+ return;
+ }
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "free slot for ip: %u.%u.%u.%u (%u)",
+ __LINE__, DOT_SEPARATED(pSockContext->machine->ip),
+ machine_sockets[machine_id].ip);
+ */
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ pSockContext->next = machine_sockets[machine_id].accept_free_list;
+ machine_sockets[machine_id].accept_free_list = pSockContext;
+ ink_mutex_release(&connect_thread_context.lock);
+}
+
+static int alloc_socket_contexts(const int connections_per_machine,
+ SocketContext **pool)
+{
+ int result;
+ int bytes;
+ int i;
+ int total_connections;
+
+ SocketContext *pSockContext;
+ SocketContext *pSockContextEnd;
+
+ total_connections = connections_per_machine * MAX_MACHINE_COUNT + 1;
+ bytes = sizeof(SocketContext) * total_connections;
+ *pool = (SocketContext *)malloc(bytes);
+ if (*pool == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ memset(*pool, 0, bytes);
+
+ pSockContextEnd = *pool + total_connections;
+ for (pSockContext=*pool; pSockContext<pSockContextEnd;
+ pSockContext++)
+ {
+ for (i=0; i<PRIORITY_COUNT; i++) {
+ if ((result=ink_mutex_init(&pSockContext->send_queues[i].lock, "send_lock")) != 0) {
+ return result;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int init_socket_contexts()
+{
+ int result;
+ int half_connections_per_machine;
+ int machine_index;
+ int thread_index;
+ int k;
+ SocketContext *pSockContext;
+
+ if ((result=alloc_socket_contexts(num_of_cluster_connections,
+ &socket_contexts_pool)) != 0)
+ {
+ return result;
+ }
+
+ half_connections_per_machine = num_of_cluster_connections / 2;
+ pSockContext = socket_contexts_pool + 1; //0 for server accept
+ thread_index = 0;
+ for (machine_index=0; machine_index<MAX_MACHINE_COUNT; machine_index++) {
+ for (k=0; k<half_connections_per_machine; k++) {
+ pSockContext->connect_type = CONNECT_TYPE_SERVER;
+ pSockContext->next = machine_sockets[machine_index].accept_free_list;
+ machine_sockets[machine_index].accept_free_list = pSockContext;
+ pSockContext->thread_context = cluster_worker_thread_contexts +
+ thread_index++ % num_of_cluster_threads;
+ pSockContext++;
+ }
+
+ for (k=0; k<half_connections_per_machine; k++) {
+ pSockContext->connect_type = CONNECT_TYPE_CLIENT;
+ pSockContext->next = machine_sockets[machine_index].connect_free_list;
+ machine_sockets[machine_index].connect_free_list = pSockContext;
+ pSockContext->thread_context = cluster_worker_thread_contexts +
+ thread_index++ % num_of_cluster_threads;
+ pSockContext++;
+ }
+ }
+
+ return 0;
+}
+
+int connection_init()
+{
+ int result;
+ int bytes;
+
+ bytes = sizeof(SocketContextsByMachine) * MAX_MACHINE_COUNT;
+ machine_sockets = (SocketContextsByMachine *)malloc(bytes);
+ if (machine_sockets == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ memset(machine_sockets, 0, bytes);
+
+ connect_thread_context.alloc_size = MAX_MACHINE_COUNT *
+ num_of_cluster_connections + 1;
+
+ bytes = sizeof(ConnectContext) * connect_thread_context.alloc_size;
+ connect_thread_context.connections_buffer = (ConnectContext *)malloc(bytes);
+ if (connect_thread_context.connections_buffer == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ memset(connect_thread_context.connections_buffer, 0, bytes);
+
+ bytes = sizeof(ConnectContext *) * connect_thread_context.alloc_size;
+ connect_thread_context.connections = (ConnectContext **)malloc(bytes);
+ if (connect_thread_context.connections == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ memset(connect_thread_context.connections, 0, bytes);
+ connect_thread_context.connection_count = 0;
+
+ connect_thread_context.ev_poll = new EventPoll(
+ connect_thread_context.alloc_size, 1000);
+ if (connect_thread_context.ev_poll == NULL) {
+ Error("file: " __FILE__ ", line: %d, "
+ "new EventPoll fail, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ if ((result=ink_mutex_init(&connect_thread_context.lock, "connection_lock")) != 0) {
+ return result;
+ }
+
+ if ((result=init_socket_contexts()) != 0) {
+ return result;
+
+ }
+
+ return 0;
+}
+
+void connection_destroy()
+{
+}
+
+static ConnectContext *find_connection(SocketContext *pSockContext)
+{
+ ConnectContext **ppConnection;
+ ConnectContext **ppConnectionEnd;
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ ppConnectionEnd = connect_thread_context.connections +
+ connect_thread_context.connection_count;
+ for (ppConnection=connect_thread_context.connections; ppConnection<ppConnectionEnd;
+ ppConnection++)
+ {
+ if ((*ppConnection)->pSockContext == pSockContext) {
+ break;
+ }
+ }
+ ink_mutex_release(&connect_thread_context.lock);
+
+ return (ppConnection == ppConnectionEnd) ? NULL: *ppConnection;
+}
+
+static int do_connect(ConnectContext *pConnectContext, const bool needLock)
+{
+ int result;
+ struct sockaddr_in addr;
+ SocketContext *pSockContext;
+
+ pSockContext = pConnectContext->pSockContext;
+ pSockContext->sock = socket(AF_INET, SOCK_STREAM, 0);
+ pConnectContext->connect_count++;
+ pConnectContext->state = STATE_CONNECTING;
+ if (pSockContext->sock < 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "socket create failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : EMFILE;
+ }
+
+ if ((result=safe_nonblocking(pSockContext->sock)) != 0) {
+ close_connection(pSockContext);
+ return result;
+ }
+
+ if (safe_setsockopt(pSockContext->sock, IPPROTO_TCP, TCP_NODELAY,
+ SOCKOPT_ON, sizeof(int)) < 0)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : EINVAL;
+ }
+
+ addr.sin_family = PF_INET;
+ addr.sin_port = htons(pSockContext->machine->cluster_port);
+ result = inet_aton(pSockContext->machine->hostname, &addr.sin_addr);
+ if (result == 0) {
+ close_connection(pSockContext);
+ remove_connection(pSockContext, needLock);
+ return EINVAL;
+ }
+
+ pConnectContext->connect_start_time = CURRENT_MS(); //connect start time
+ if (connect(pSockContext->sock, (const struct sockaddr*)&addr,
+ sizeof(addr)) == 0) //success
+ {
+ pConnectContext->state = STATE_CONNECTED;
+ pConnectContext->need_check_timeout = true;
+ return connection_handler(pConnectContext, needLock);
+ }
+
+ result = errno != 0 ? errno : EINPROGRESS;
+ if (result != EINPROGRESS) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "connect to %s:%d failed, errno: %d, error info: %s",
+ __LINE__, pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port, result, strerror(result));
+ close_connection(pSockContext);
+ return result;
+ }
+
+ if (connect_thread_context.ev_poll->attach(pSockContext->sock,
+ EVENTIO_WRITE, pConnectContext) < 0)
+ {
+ result = errno != 0 ? errno : ENOMEM;
+ Error("file: " __FILE__ ", line: %d, "
+ "event poll attach fail, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ close_connection(pSockContext);
+ return result;
+ }
+
+ pConnectContext->need_check_timeout = true;
+ return result;
+}
+
+static ConnectContext *alloc_connect_context()
+{
+ ConnectContext *pConnectContext;
+ ConnectContext *pConnectEnd;
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ if (connect_thread_context.connection_count >=
+ connect_thread_context.alloc_size)
+ {
+ ink_mutex_release(&connect_thread_context.lock);
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "exceeds max connection: %d",
+ __LINE__, connect_thread_context.alloc_size);
+ return NULL;
+ }
+
+ pConnectEnd = connect_thread_context.connections_buffer +
+ connect_thread_context.alloc_size;
+ for (pConnectContext=connect_thread_context.connections_buffer;
+ pConnectContext<pConnectEnd; pConnectContext++)
+ {
+ if (!pConnectContext->used) {
+ break;
+ }
+ }
+ if (pConnectContext == pConnectEnd) {
+ ink_mutex_release(&connect_thread_context.lock);
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "alloc connection from buffer fail", __LINE__);
+ return NULL;
+ }
+
+ pConnectContext->used = true;
+ connect_thread_context.connections[connect_thread_context.
+ connection_count++] = pConnectContext;
+ ink_mutex_release(&connect_thread_context.lock);
+
+ pConnectContext->need_reconnect = false;
+ pConnectContext->need_check_timeout = false;
+ pConnectContext->reconnect_interval = 100;
+ pConnectContext->connect_count = 0;
+ pConnectContext->state = STATE_NOT_CONNECT;
+ pConnectContext->send_bytes = 0;
+ pConnectContext->recv_bytes = 0;
+ pConnectContext->total_bytes = sizeof(MsgHeader) + sizeof(HelloMessage);
+
+ return pConnectContext;
+}
+
+int machine_stop_reconnect(ClusterMachine *m)
+{
+ int count;
+ ConnectContext **ppConnection;
+ ConnectContext **ppConnectionEnd;
+
+ count = 0;
+ ink_mutex_acquire(&connect_thread_context.lock);
+ ppConnectionEnd = connect_thread_context.connections +
+ connect_thread_context.connection_count;
+ for (ppConnection=connect_thread_context.connections; ppConnection<ppConnectionEnd;
+ ppConnection++)
+ {
+ if ((*ppConnection)->pSockContext->machine == m) {
+ count++;
+ (*ppConnection)->need_reconnect = false;
+ }
+ }
+ ink_mutex_release(&connect_thread_context.lock);
+
+ return count > 0 ? 0 : ENOENT;
+}
+
+int machine_make_connections(ClusterMachine *m)
+{
+ int half_connections_per_machine;
+ int i;
+ int result;
+ SocketContext *pSockContext;
+
+ if ((result=init_machine_sessions(m, false)) != 0) {
+ return result;
+ }
+
+ half_connections_per_machine = num_of_cluster_connections / 2;
+ for (i=0; i<half_connections_per_machine; i++) {
+ pSockContext = alloc_connect_sock_context(m->ip);
+ if (pSockContext == NULL) {
+ return ENOSPC;
+ }
+
+ pSockContext->machine = m;
+ make_connection(pSockContext);
+ }
+
+ return 0;
+}
+
+int make_connection(SocketContext *pSockContext)
+{
+ ConnectContext *pConnectContext;
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "alloc connection, current count: %d", __LINE__,
+ connect_thread_context.connection_count);
+ */
+
+ if (find_connection(pSockContext) != NULL) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "connection: %p already exist!", __LINE__, pSockContext);
+ return EEXIST;
+ }
+
+ pConnectContext = alloc_connect_context();
+ if (pConnectContext == NULL) {
+ return ENOSPC;
+ }
+
+ pConnectContext->need_reconnect = true;
+ pConnectContext->reconnect_interval = 100;
+ pConnectContext->pSockContext = pSockContext;
+ return do_connect(pConnectContext, true);
+}
+
+static int socket_bind(int sock, const char *bind_ipaddr, const int port)
+{
+ struct sockaddr_in bindaddr;
+
+ bindaddr.sin_family = AF_INET;
+ bindaddr.sin_port = htons(port);
+ if (bind_ipaddr == NULL || *bind_ipaddr == '\0') {
+ bindaddr.sin_addr.s_addr = INADDR_ANY;
+ }
+ else {
+ if (inet_aton(bind_ipaddr, &bindaddr.sin_addr) == 0) {
+ Error("file: "__FILE__", line: %d, "
+ "invalid ip address: %s", __LINE__, bind_ipaddr);
+ return EINVAL;
+ }
+ }
+
+ if (bind(sock, (struct sockaddr*)&bindaddr, sizeof(bindaddr)) < 0) {
+ Error("file: "__FILE__", line: %d, "
+ "bind port %d failed, errno: %d, error info: %s",
+ __LINE__, port, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ return 0;
+}
+
+static int socket_server(const char *bind_ipaddr, const int port, int *err_no)
+{
+ int sock;
+ int result;
+
+ sock = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock < 0) {
+ *err_no = errno != 0 ? errno : EMFILE;
+ Error("file: "__FILE__", line: %d, "
+ "socket create failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return -1;
+ }
+
+ result = 1;
+ if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &result, sizeof(int))<0) {
+ *err_no = errno != 0 ? errno : ENOMEM;
+ Error("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ close(sock);
+ return -1;
+ }
+
+ if ((*err_no=socket_bind(sock, bind_ipaddr, port)) != 0) {
+ close(sock);
+ return -1;
+ }
+
+ if (listen(sock, 1024) < 0)
+ {
+ *err_no = errno != 0 ? errno : EINVAL;
+ Error("file: "__FILE__", line: %d, "
+ "listen port %d failed, errno: %d, error info: %s",
+ __LINE__, port, errno, strerror(errno));
+ close(sock);
+ return -1;
+ }
+
+ *err_no = 0;
+ return sock;
+}
+
+static int set_server_sock_opt(int fd, const int timeout)
+{
+ int flags;
+ struct linger linger;
+
+ linger.l_onoff = 0;
+ linger.l_linger = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER,
+ &linger, (socklen_t)sizeof(struct linger)) < 0)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ if (timeout > 0) {
+ struct timeval waittime;
+ waittime.tv_sec = timeout;
+ waittime.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO,
+ &waittime, (socklen_t)sizeof(struct timeval)) < 0)
+ {
+ Warning("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO,
+ &waittime, (socklen_t)sizeof(struct timeval)) < 0)
+ {
+ Warning("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ }
+ }
+
+ flags = 1;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
+ (char *)&flags, sizeof(flags)) < 0)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : EINVAL;
+ }
+
+ return 0;
+}
+
+int connection_manager_init(const unsigned int my_ip)
+{
+ ConnectContext *pConnectContext;
+ char bind_addr[IP_ADDRESS_SIZE];
+ int result;
+ int server_sock;
+
+ assert(MSG_HEADER_LENGTH % 16 == 0);
+ *bind_addr = '\0';
+ server_sock = socket_server(bind_addr, cluster_port, &result);
+ if (server_sock < 0) {
+ return errno != 0 ? errno : EIO;
+ }
+
+ if ((result=set_server_sock_opt(server_sock, 0)) != 0) {
+ return result;
+ }
+
+ if ((result=safe_nonblocking(server_sock)) != 0) {
+ return result;
+ }
+
+ if ((result=init_machines()) != 0) {
+ return result;
+ }
+
+ if (my_ip > 0) {
+ my_machine_ip = my_ip;
+ add_machine(my_ip, cluster_port);
+ }
+
+ if ((result=nio_init()) != 0 || (result=connection_init()) != 0
+ || (result=session_init()) != 0)
+ {
+ return result;
+ }
+
+ pConnectContext = alloc_connect_context();
+ if (pConnectContext == NULL) {
+ return ENOSPC;
+ }
+
+ pConnectContext->pSockContext = socket_contexts_pool + 0;
+ pConnectContext->is_accept = true;
+ pConnectContext->pSockContext->sock = server_sock;
+ if (connect_thread_context.ev_poll->attach(server_sock, EVENTIO_READ,
+ pConnectContext) < 0)
+ {
+ result = errno != 0 ? errno : ENOMEM;
+ Error("file: " __FILE__ ", line: %d, "
+ "event poll attach fail, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return result;
+ }
+
+ return 0;
+}
+
+void connection_manager_destroy()
+{
+}
+
+int connection_manager_start()
+{
+ if (ink_thread_create(connect_worker_entrance, NULL) == 0)
+ {
+ int result;
+ result = errno != 0 ? errno : ENOMEM;
+ Error("file: "__FILE__", line: %d, "
+ "create thread failed, errno: %d, error info: %s",
+ __LINE__, result, strerror(result));
+ return result;
+ }
+
+ return 0;
+}
+
+static int close_timeout_connections()
+{
+#define MAX_TIMEOUT_SOCKET_COUNT 64
+ ConnectContext **ppConnection;
+ ConnectContext **ppConnectionEnd;
+ SocketContext *pSockContext;
+ ConnectContext *timeoutConnectContexts[MAX_TIMEOUT_SOCKET_COUNT];
+ int timeout_count;
+ int i;
+ bool bTimeout;
+
+ timeout_count = 0;
+ ink_mutex_acquire(&connect_thread_context.lock);
+ ppConnectionEnd = connect_thread_context.connections +
+ connect_thread_context.connection_count;
+ ppConnection = connect_thread_context.connections;
+ while (ppConnection < ppConnectionEnd) {
+ pSockContext = (*ppConnection)->pSockContext;
+ if (!(*ppConnection)->need_check_timeout || pSockContext->sock < 0) {
+ ppConnection++;
+ continue;
+ }
+
+ if ((*ppConnection)->state == STATE_RECV_DATA) {
+ bTimeout = (CURRENT_MS() - (*ppConnection)->server_start_time >= 1000);
+ }
+ else {
+ bTimeout = ((*ppConnection)->state == STATE_CONNECTING &&
+ CURRENT_MS() - (*ppConnection)->connect_start_time >=
+ cluster_connect_timeout * 1000);
+ }
+
+ if (bTimeout) {
+ timeoutConnectContexts[timeout_count++] = *ppConnection;
+ if (timeout_count == MAX_TIMEOUT_SOCKET_COUNT) {
+ break;
+ }
+ }
+
+ ppConnection++;
+ }
+
+ for (i=0; i<timeout_count; i++) {
+ pSockContext = timeoutConnectContexts[i]->pSockContext;
+ if (connect_thread_context.ev_poll->detach(pSockContext->sock) < 0) {
+ Error("file: " __FILE__ ", line: %d, "
+ "event poll detach #%d fail, errno: %d, error info: %s",
+ __LINE__, pSockContext->sock, errno, strerror(errno));
+ }
+
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "close timeout %s connection #%d %s:%d, type: %c",
+ __LINE__, timeoutConnectContexts[i]->state == STATE_RECV_DATA ?
+ "recv" : "connect", pSockContext->sock,
+ pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port,
+ pSockContext->connect_type);
+
+ release_connection(pSockContext, false);
+ }
+
+ ink_mutex_release(&connect_thread_context.lock);
+ return 0;
+}
+
+static int do_reconnect()
+{
+ ConnectContext **ppConnection;
+ ConnectContext **ppConnectionEnd;
+ SocketContext *pSockContext;
+ int max_reconnect_interval;
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ ppConnectionEnd = connect_thread_context.connections +
+ connect_thread_context.connection_count;
+ ppConnection = connect_thread_context.connections;
+ while (ppConnection < ppConnectionEnd) {
+ if (*ppConnection == NULL || (*ppConnection)->pSockContext == NULL) {
+ Warning("file: " __FILE__ ", line: %d, "
+ "pConnection: %p, pSockContext: %p", __LINE__,
+ *ppConnection, *ppConnection != NULL ? (*ppConnection)->pSockContext : NULL);
+ ppConnection++;
+ continue;
+ }
+
+ if ((*ppConnection)->pSockContext->sock >= 0) { //already in progress or connected
+ ppConnection++;
+ continue;
+ }
+
+ if ((*ppConnection)->need_reconnect) {
+ if ((*ppConnection)->connect_count > 0) { //should reconnect
+ if (CURRENT_MS() - (*ppConnection)->connect_start_time <
+ (*ppConnection)->reconnect_interval)
+ {
+ ppConnection++;
+ continue;
+ }
+
+ (*ppConnection)->reconnect_interval *= 2;
+ if ((*ppConnection)->pSockContext->machine->dead) {
+ max_reconnect_interval = 1000;
+ }
+ else {
+ max_reconnect_interval = 30000;
+ }
+ if ((*ppConnection)->reconnect_interval > max_reconnect_interval) {
+ (*ppConnection)->reconnect_interval = max_reconnect_interval;
+ }
+ (*ppConnection)->need_check_timeout = false;
+ do_connect(*ppConnection, false);
+ ppConnection++;
+ ppConnectionEnd = connect_thread_context.connections +
+ connect_thread_context.connection_count;
+ }
+ }
+ else { //should release
+ pSockContext = (*ppConnection)->pSockContext;
+ if (remove_connection(pSockContext, false) == 0) { //removed
+ ppConnectionEnd = connect_thread_context.connections +
+ connect_thread_context.connection_count;
+ }
+ else {
+ ppConnection++;
+ }
+
+ free_connect_sock_context(pSockContext, false);
+ }
+ }
+ ink_mutex_release(&connect_thread_context.lock);
+
+ return 0;
+}
+
+static in_addr_t get_peer_ip(int sock, char *buff, const int bufferSize)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+
+ memset(&addr, 0, sizeof(addr));
+ addrlen = sizeof(addr);
+
+ if (getpeername(sock, (struct sockaddr *)&addr, &addrlen) != 0) {
+ *buff = '\0';
+ return INADDR_NONE;
+ }
+
+ if (addrlen > 0) {
+ if (inet_ntop(AF_INET, &addr.sin_addr, buff, bufferSize) == NULL) {
+ *buff = '\0';
+ }
+ }
+ else {
+ *buff = '\0';
+ }
+
+ return addr.sin_addr.s_addr;
+}
+
+static int deal_income_connection(const int incomesock)
+{
+ int result;
+ char client_ip[IP_ADDRESS_SIZE];
+ in_addr_t ip;
+ ConnectContext *pConnectContext;
+ SocketContext *pSockContext;
+ ClusterMachine *machine;
+
+ if ((result=safe_nonblocking(incomesock)) != 0) {
+ return result;
+ }
+ if (safe_setsockopt(incomesock, IPPROTO_TCP, TCP_NODELAY, SOCKOPT_ON, sizeof(int)) < 0) {
+ Error("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : EINVAL;
+ }
+
+ ip = get_peer_ip(incomesock, client_ip, sizeof(client_ip));
+ machine = get_machine(ip, cluster_port);
+ if (machine == NULL) {
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "client: %s not in my machine list",
+ __LINE__, client_ip);
+ return ENOENT;
+ }
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "income client_ip: %s, ip: %u == %u, sock: #%d", __LINE__,
+ client_ip, ip, machine->ip, incomesock);
+ */
+
+ pSockContext = alloc_accept_sock_context(machine->ip);
+ if (pSockContext == NULL) {
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "client: %s, too many income connections, exceeds %d",
+ __LINE__, client_ip, num_of_cluster_connections / 2);
+ return ENOSPC;
+ }
+
+ pSockContext->sock = incomesock;
+ pSockContext->machine = machine;
+
+ pConnectContext = alloc_connect_context();
+ if (pConnectContext == NULL) {
+ free_accept_sock_context(pSockContext);
+ return ENOSPC;
+ }
+
+ pConnectContext->pSockContext = pSockContext;
+ pConnectContext->state = STATE_CONNECTED;
+ pConnectContext->need_check_timeout = true;
+ connection_handler(pConnectContext, true);
+ return 0;
+}
+
+static int deal_accept_event(SocketContext *pSockContext)
+{
+ int incomesock;
+ int result;
+ struct sockaddr_in inaddr;
+ socklen_t sockaddr_len;
+
+ sockaddr_len = sizeof(inaddr);
+ incomesock = accept(pSockContext->sock, (struct sockaddr*)&inaddr,
+ &sockaddr_len);
+ if (incomesock < 0) { //error
+ result = errno != 0 ? errno : EAGAIN;
+ if (result == EINTR) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "accept failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return 0; //should try again
+ }
+ else if (!(errno == EAGAIN)) {
+ Error("file: "__FILE__", line: %d, "
+ "accept failed, errno: %d, error info: %s",
+ __LINE__, result, strerror(result));
+ }
+
+ return result;
+ }
+
+ result = deal_income_connection(incomesock);
+ if (result != 0)
+ {
+ close(incomesock);
+ }
+
+ return 0;
+}
+
+static int deal_connect_events(const int count)
+{
+ int events;
+ ConnectContext *pConnectContext;
+ SocketContext *pSockContext;
+ //static int counter = 0;
+
+ for (int i=0; i<count; i++) {
+ events = connect_thread_context.ev_poll->getEvents(i);
+ pConnectContext = (ConnectContext *)connect_thread_context.ev_poll->getData(i);
+ pSockContext = pConnectContext->pSockContext;
+
+ if (pConnectContext->is_accept) {
+ while (deal_accept_event(pSockContext) == 0) {
+ }
+ continue;
+ }
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "%d. connections #%d %s:%d, type: %c, poll events: %d", __LINE__,
+ ++counter, pSockContext->sock, pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port, pSockContext->connect_type,
+ events);
+ */
+
+ if ((events & EVENTIO_ERROR) != 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "connect %s %s:%d fail, connection closed",
+ __LINE__, pSockContext->connect_type == CONNECT_TYPE_SERVER ?
+ "from" : "to", pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port);
+ release_connection(pSockContext, true);
+ continue;
+ }
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "====in: %d, out: %d====", __LINE__, (events & EVENTIO_READ),
+ (events & EVENTIO_WRITE));
+ */
+
+ if ((events & EVENTIO_READ) || (events & EVENTIO_WRITE)) {
+ connection_handler(pConnectContext, true);
+ }
+ }
+
+ return 0;
+}
+
+void *connect_worker_entrance(void * /* arg */)
+{
+ int count;
+ time_t last_cluster_stat_time;
+
+#if defined(TRIGGER_STAT_FLAG) || defined(MSG_TIME_STAT_FLAG)
+ time_t last_msg_stat_time;
+#endif
+
+#if defined(HAVE_SYS_PRCTL_H) && defined(PR_SET_NAME)
+ prctl(PR_SET_NAME, "[ET_CLUSTER 0]", 0, 0, 0);
+#endif
+
+ last_cluster_stat_time = CURRENT_TIME();
+#if defined(TRIGGER_STAT_FLAG) || defined(MSG_TIME_STAT_FLAG)
+ last_msg_stat_time = CURRENT_TIME();
+#endif
+
+ while (1) {
+ if (CURRENT_TIME() - last_cluster_stat_time > 1) {
+ log_session_stat();
+ log_nio_stats();
+ last_cluster_stat_time = CURRENT_TIME();
+ }
+
+#if defined(TRIGGER_STAT_FLAG) || defined(MSG_TIME_STAT_FLAG)
+ if (CURRENT_TIME() - last_msg_stat_time >= 60) {
+#ifdef TRIGGER_STAT_FLAG
+ log_trigger_stat();
+#endif
+
+#ifdef MSG_TIME_STAT_FLAG
+ log_msg_time_stat();
+#endif
+
+ last_msg_stat_time = CURRENT_TIME();
+ }
+#endif
+
+ if (connect_thread_context.connection_count > 1) {
+ do_reconnect();
+ }
+
+ count = connect_thread_context.ev_poll->poll();
+ if (count == 0) { //timeout
+ if (connect_thread_context.connection_count > 1) {
+ close_timeout_connections();
+ }
+ continue;
+ }
+ if (count < 0) {
+ if (errno != EINTR) {
+ ink_fatal(1, "file: "__FILE__", line: %d, "
+ "call event poll fail, errno: %d, error info: %s\n",
+ __LINE__, errno, strerror(errno));
+ }
+ continue;
+ }
+
+ deal_connect_events(count);
+ }
+
+ return NULL;
+}
+
+int add_machine_sock_context(SocketContext *pSockContext)
+{
+ SocketContextArray *contextArray;
+ SocketContext **oldContexts;
+ SocketContext **newContexts;
+ int bytes;
+ int machine_id;
+ if ((machine_id=get_machine_index(pSockContext->machine->ip)) < 0) {
+ return ENOENT;
+ }
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ contextArray = &machine_sockets[machine_id].connected_list;
+ if (contextArray->count >= contextArray->alloc_size) {
+ if (contextArray->alloc_size == 0) {
+ contextArray->alloc_size = 64;
+ }
+ else {
+ contextArray->alloc_size *= 2;
+ }
+
+ bytes = sizeof(SocketContext *) * contextArray->alloc_size;
+ newContexts = (SocketContext **)malloc(bytes);
+ if (newContexts == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ ink_mutex_release(&connect_thread_context.lock);
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ memset(newContexts, 0, bytes);
+ if (contextArray->count > 0) {
+ memcpy(newContexts, contextArray->contexts,
+ sizeof(SocketContext *) * contextArray->count);
+ }
+
+ oldContexts = contextArray->contexts;
+ contextArray->contexts = newContexts;
+ if (oldContexts != NULL) {
+ free(oldContexts);
+ }
+ }
+
+ contextArray->contexts[contextArray->count++] = pSockContext;
+ ink_mutex_release(&connect_thread_context.lock);
+
+ return 0;
+}
+
+int remove_machine_sock_context(SocketContext *pSockContext)
+{
+ SocketContextArray *contextArray;
+ unsigned int found;
+ unsigned int i;
+ int machine_id;
+
+ if ((machine_id=get_machine_index(pSockContext->machine->ip)) < 0) {
+ return ENOENT;
+ }
+
+ ink_mutex_acquire(&connect_thread_context.lock);
+ contextArray = &machine_sockets[machine_id].connected_list;
+ if (contextArray->count == 0) {
+ ink_mutex_release(&connect_thread_context.lock);
+ return ENOENT;
+ }
+
+ for (found=0; found<contextArray->count; found++) {
+ if (contextArray->contexts[found] == pSockContext) {
+ break;
+ }
+ }
+
+ if (found == contextArray->count) {
+ ink_mutex_release(&connect_thread_context.lock);
+ return ENOENT;
+ }
+
+ for (i=found+1; i<contextArray->count; i++) {
+ contextArray->contexts[i-1] = contextArray->contexts[i];
+ }
+ contextArray->contexts[--contextArray->count] = NULL;
+ ink_mutex_release(&connect_thread_context.lock);
+
+ return 0;
+}
+
+SocketContext *get_socket_context(const ClusterMachine *machine)
+{
+ SocketContextArray *pSocketContextArray;
+ int machine_id;
+ int context_count;
+ unsigned int context_index;
+
+ if ((machine_id=get_machine_index(machine->ip)) < 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "the index of ip addr: %s not exist", __LINE__, machine->hostname);
+ return NULL;
+ }
+
+ pSocketContextArray = &machine_sockets[machine_id].connected_list;
+ context_count = pSocketContextArray->count;
+ if (context_count <= 0) {
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "the socket context count of ip addr: %s is zero",
+ __LINE__, machine->hostname);
+ */
+ return NULL;
+ }
+
+ context_index = ink_atomic_increment(&pSocketContextArray->index, 1) %
+ context_count;
+
+ return pSocketContextArray->contexts[context_index];
+}
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/connection.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/connection.h b/iocore/cluster/connection.h
new file mode 100644
index 0000000..ac53ff0
--- /dev/null
+++ b/iocore/cluster/connection.h
@@ -0,0 +1,75 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _CONNECTION_H_
+#define _CONNECTION_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+
+typedef struct socket_context_array {
+ SocketContext **contexts;
+ unsigned int alloc_size; //alloc size
+ unsigned int count; //item count
+ volatile unsigned int index; //current select index
+} SocketContextArray;
+
+typedef struct socket_context_by_machine {
+ unsigned int ip;
+ socket_context_array connected_list; //connected sockets
+ SocketContext *accept_free_list; //socket malloc for accept
+ SocketContext *connect_free_list; //socket malloc for connect
+} SocketContextsByMachine;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int connection_init();
+void connection_destroy();
+
+int connection_manager_init(const unsigned int my_ip);
+void connection_manager_destroy();
+int connection_manager_start();
+
+int log_message_stat(void *arg);
+
+SocketContext *get_socket_context(const ClusterMachine *machine);
+
+void free_accept_sock_context(SocketContext *pSockContext);
+
+int machine_make_connections(ClusterMachine *m);
+int machine_stop_reconnect(ClusterMachine *m);
+int make_connection(SocketContext *pSockContext);
+
+int add_machine_sock_context(SocketContext *pSockContext);
+int remove_machine_sock_context(SocketContext *pSockContext);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/global.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/global.cc b/iocore/cluster/global.cc
new file mode 100644
index 0000000..0556763
--- /dev/null
+++ b/iocore/cluster/global.cc
@@ -0,0 +1,40 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include "global.h"
+
+int cluster_connect_timeout = 1;
+
+//cluster flow control
+int64_t cluster_flow_ctrl_min_bps = 0; //bit
+int64_t cluster_flow_ctrl_max_bps = 0; //bit
+int cluster_send_min_wait_time = 1000; //us
+int cluster_send_max_wait_time = 5000; //us
+int cluster_min_loop_interval = 0; //us
+int cluster_max_loop_interval = 1000; //us
+int64_t cluster_ping_send_interval= 0;
+int64_t cluster_ping_latency_threshold = 0;
+int cluster_ping_retries = 3;
+int max_session_count_per_machine = 1000000;
+int session_lock_count_per_machine = 10949;
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/global.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/global.h b/iocore/cluster/global.h
new file mode 100644
index 0000000..7fed404
--- /dev/null
+++ b/iocore/cluster/global.h
@@ -0,0 +1,61 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _GLOBAL_H
+#define _GLOBAL_H
+
+#include <stdint.h>
+#include "types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int num_of_cluster_threads;
+extern int num_of_cluster_connections; //must be an even number
+extern int cluster_send_buffer_size;
+extern int cluster_receive_buffer_size;
+extern int cluster_connect_timeout; //second
+
+//cluster flow control
+extern int64_t cluster_flow_ctrl_min_bps; //bit
+extern int64_t cluster_flow_ctrl_max_bps; //bit
+extern int cluster_send_min_wait_time; //us
+extern int cluster_send_max_wait_time; //us
+extern int cluster_min_loop_interval; //us
+extern int cluster_max_loop_interval; //us
+
+//cluster ping
+extern int64_t cluster_ping_send_interval;
+extern int64_t cluster_ping_latency_threshold;
+extern int cluster_ping_retries;
+
+extern int max_session_count_per_machine;
+extern int session_lock_count_per_machine;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/machine.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/machine.cc b/iocore/cluster/machine.cc
new file mode 100644
index 0000000..18ac1a9
--- /dev/null
+++ b/iocore/cluster/machine.cc
@@ -0,0 +1,269 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <fcntl.h>
+#include "Diags.h"
+#include "global.h"
+#include "nio.h"
+#include "connection.h"
+#include "message.h"
+#include "machine.h"
+#include "ink_config.h"
+#include "P_Cluster.h"
+
+unsigned int my_machine_ip = 0;
+int cluster_machine_count = 0; //total machine count of the cluster
+
+ClusterMachine *cluster_machines = NULL;
+static ClusterMachine **sorted_machines = NULL; //sort by ip and port
+static ink_mutex machine_lock;
+
+static ClusterMachine *do_add_machine(ClusterMachine *m, int *result);
+
+ClusterMachine *add_machine(const unsigned int ip, const int port)
+{
+ ClusterMachine machine;
+ struct in_addr in;
+ int result;
+ char *ip_addr;
+
+ memset(&machine, 0, sizeof(machine));
+ in.s_addr = ip;
+ ip_addr = inet_ntoa(in);
+ machine.hostname_len = strlen(ip_addr);
+ machine.hostname = strdup(ip_addr);
+ machine.cluster_port = port;
+ machine.ip = ip;
+
+ return do_add_machine(&machine, &result);
+}
+
+int init_machines()
+{
+ int result;
+ int bytes;
+
+ if ((result=ink_mutex_init(&machine_lock, "machine_lock")) != 0) {
+ return result;
+ }
+
+ cluster_machine_count = 0;
+ bytes = sizeof(ClusterMachine) * MAX_MACHINE_COUNT;
+ cluster_machines = (ClusterMachine *)malloc(bytes);
+ if (cluster_machines == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail!", __LINE__, bytes);
+ return ENOMEM;
+ }
+ memset(cluster_machines, 0, bytes);
+
+ bytes = sizeof(ClusterMachine *) * MAX_MACHINE_COUNT;
+ sorted_machines = (ClusterMachine **)malloc(bytes);
+ if (sorted_machines == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail!", __LINE__, bytes);
+ return ENOMEM;
+ }
+ memset(sorted_machines, 0, bytes);
+
+ return 0;
+}
+
+static int compare_machine(const void *p1, const void *p2)
+{
+ const ClusterMachine **m1 = (const ClusterMachine **)p1;
+ const ClusterMachine **m2 = (const ClusterMachine **)p2;
+
+ if ((*m1)->ip == (*m2)->ip) {
+ return (*m1)->cluster_port - (*m2)->cluster_port;
+ }
+ else {
+ return (*m1)->ip < (*m2)->ip ? -1 : 1;
+ }
+}
+
+static ClusterMachine *do_add_machine(ClusterMachine *m, int *result)
+{
+ ClusterMachine **ppMachine;
+ ClusterMachine **ppMachineEnd;
+ ClusterMachine **pp;
+ ClusterMachine *pMachine;
+ int cr;
+
+ cr = -1;
+ ink_mutex_acquire(&machine_lock);
+ ppMachineEnd = sorted_machines + cluster_machine_count;
+ for (ppMachine=sorted_machines; ppMachine<ppMachineEnd; ppMachine++) {
+ cr = compare_machine(&m, ppMachine);
+ if (cr <= 0) {
+ break;
+ }
+ }
+
+ do {
+ if (cr == 0) { //found
+ pMachine = *ppMachine;
+ *result = EEXIST;
+ break;
+ }
+
+ if (cluster_machine_count >= MAX_MACHINE_COUNT) {
+ Error("file: "__FILE__", line: %d, "
+ "host: %s:%u, exceeds max machine: %d!", __LINE__, m->hostname,
+ m->cluster_port, MAX_MACHINE_COUNT);
+ *result = ENOSPC;
+ pMachine = NULL;
+ break;
+ }
+
+ for (pp=ppMachineEnd; pp>ppMachine; pp--) {
+ *pp = *(pp - 1);
+ }
+
+ pMachine = cluster_machines + cluster_machine_count; //the last emlement
+ *ppMachine = pMachine;
+
+ pMachine->dead = true;
+ pMachine->ip = m->ip;
+ pMachine->cluster_port = m->cluster_port;
+ pMachine->hostname_len = m->hostname_len;
+ if (m->hostname_len == 0) {
+ pMachine->hostname = NULL;
+ }
+ else {
+ pMachine->hostname = (char *)malloc(m->hostname_len + 1);
+ if (pMachine->hostname == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail!", __LINE__, m->hostname_len + 1);
+ *result = ENOMEM;
+ break;
+ }
+ memcpy(pMachine->hostname, m->hostname, m->hostname_len + 1);
+ }
+
+ cluster_machine_count++;
+ *result = 0;
+ } while (0);
+
+ ink_mutex_release(&machine_lock);
+ return pMachine;
+}
+
+ClusterMachine *get_machine(const unsigned int ip, const int port)
+{
+ ClusterMachine machine;
+ ClusterMachine *target;
+ ClusterMachine **found;
+
+ memset(&machine, 0, sizeof(machine));
+ machine.ip = ip;
+ machine.cluster_port = port;
+ target = &machine;
+ found = (ClusterMachine **)bsearch(&target, sorted_machines, cluster_machine_count,
+ sizeof(ClusterMachine *), compare_machine);
+ if (found != NULL) {
+ return *found;
+ }
+ else {
+ return NULL;
+ }
+}
+
+int machine_up_notify(ClusterMachine *machine)
+{
+ if (machine == NULL) {
+ return ENOENT;
+ }
+
+ ink_mutex_acquire(&machine_lock);
+
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "machine_up_notify, %s connection count: %d, dead: %d",
+ __LINE__, machine->hostname, machine->now_connections, machine->dead);
+
+ if (machine->dead) {
+ machine->dead = false;
+ cluster_machine_change_notify(machine);
+ }
+ ink_mutex_release(&machine_lock);
+
+ return 0;
+}
+
+int machine_add_connection(SocketContext *pSockContext)
+{
+ int result;
+ int count;
+
+ ink_mutex_acquire(&machine_lock);
+ if ((result=nio_add_to_epoll(pSockContext)) != 0) {
+ ink_mutex_release(&machine_lock);
+ return result;
+ }
+
+ count = ++pSockContext->machine->now_connections;
+ ink_mutex_release(&machine_lock);
+
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "%s add %c connection count: %d, dead: %d", __LINE__,
+ pSockContext->machine->hostname, pSockContext->connect_type,
+ count, pSockContext->machine->dead);
+
+ return 0;
+}
+
+int machine_remove_connection(SocketContext *pSockContext)
+{
+ int count;
+ int result;
+
+ ink_mutex_acquire(&machine_lock);
+ if ((result=remove_machine_sock_context(pSockContext)) != 0) {
+ ink_mutex_release(&machine_lock);
+ return result;
+ }
+
+ count = --pSockContext->machine->now_connections;
+ if (count == 0 && !pSockContext->machine->dead) { //should remove machine from config
+ pSockContext->machine->dead = true;
+ cluster_machine_change_notify(pSockContext->machine);
+ }
+ ink_mutex_release(&machine_lock);
+
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "%s remove %c connection count: %d, dead: %d", __LINE__,
+ pSockContext->machine->hostname, pSockContext->connect_type,
+ count, pSockContext->machine->dead);
+
+ return 0;
+}
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/machine.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/machine.h b/iocore/cluster/machine.h
new file mode 100644
index 0000000..8ea6981
--- /dev/null
+++ b/iocore/cluster/machine.h
@@ -0,0 +1,51 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _MACHINE_H
+#define _MACHINE_H
+
+#include "types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern unsigned int my_machine_ip;
+extern int cluster_machine_count;
+extern struct ClusterMachine *cluster_machines;
+
+int init_machines();
+ClusterMachine *add_machine(const unsigned int ip, const int port);
+
+ClusterMachine *get_machine(const unsigned int ip, const int port);
+
+int machine_up_notify(ClusterMachine *machine);
+int machine_add_connection(SocketContext *pSockContext);
+int machine_remove_connection(SocketContext *pSockContext);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
[6/6] git commit: refine the codes of cluster
Posted by we...@apache.org.
refine the codes of cluster
Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/62504a9f
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/62504a9f
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/62504a9f
Branch: refs/heads/refine_cluster
Commit: 62504a9f82e7e862015aeb2393cda981ff78789e
Parents: 27246a5
Author: weijin <we...@apache.org>
Authored: Fri Oct 11 16:03:18 2013 +0800
Committer: weijin <ta...@taobao.com>
Committed: Wed Dec 4 11:37:40 2013 +0800
----------------------------------------------------------------------
iocore/cache/Cache.cc | 2 +-
iocore/cache/CacheRead.cc | 2 +-
iocore/cache/I_Cache.h | 2 +-
iocore/cache/P_Cache.h | 2 +
iocore/cache/P_CacheInternal.h | 7 +-
iocore/cluster/ClusterCache.cc | 4679 +++++++++++++++-----------
iocore/cluster/ClusterConfig.cc | 80 +-
iocore/cluster/ClusterMachine.cc | 14 +-
iocore/cluster/ClusterProcessor.cc | 623 ++--
iocore/cluster/ClusterVConnection.cc | 548 ++-
iocore/cluster/EventPoll.cc | 158 +
iocore/cluster/EventPoll.h | 105 +
iocore/cluster/Makefile.am | 9 +-
iocore/cluster/P_Cluster.h | 2 +
iocore/cluster/P_ClusterCache.h | 367 +-
iocore/cluster/P_ClusterCacheInternal.h | 374 +-
iocore/cluster/P_ClusterInline.h | 232 +-
iocore/cluster/clusterinterface.h | 104 +
iocore/cluster/connection.cc | 1726 ++++++++++
iocore/cluster/connection.h | 75 +
iocore/cluster/global.cc | 40 +
iocore/cluster/global.h | 61 +
iocore/cluster/machine.cc | 269 ++
iocore/cluster/machine.h | 51 +
iocore/cluster/message.cc | 229 ++
iocore/cluster/message.h | 75 +
iocore/cluster/nio.cc | 1701 ++++++++++
iocore/cluster/nio.h | 60 +
iocore/cluster/session.cc | 1267 +++++++
iocore/cluster/session.h | 97 +
iocore/cluster/types.h | 235 ++
iocore/eventsystem/I_Event.h | 1 +
iocore/eventsystem/P_IOBuffer.h | 20 +-
mgmt/RecordsConfig.cc | 18 +
34 files changed, 10851 insertions(+), 2384 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cache/Cache.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
index 66f2b70..8d4b1e5 100644
--- a/iocore/cache/Cache.cc
+++ b/iocore/cache/Cache.cc
@@ -82,7 +82,7 @@ int cache_config_read_while_writer = 0;
char cache_system_config_directory[PATH_NAME_MAX + 1];
int cache_config_mutex_retry_delay = 2;
#ifdef HTTP_CACHE
-static int enable_cache_empty_http_doc = 0;
+int enable_cache_empty_http_doc = 0;
#endif
#if TS_USE_INTERIM_CACHE == 1
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cache/CacheRead.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/CacheRead.cc b/iocore/cache/CacheRead.cc
index 3c97305..90a7bd5 100644
--- a/iocore/cache/CacheRead.cc
+++ b/iocore/cache/CacheRead.cc
@@ -722,7 +722,7 @@ CacheVC::openReadMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
return EVENT_DONE;
// we have to keep reading until we give the user all the
// bytes it wanted or we hit the watermark.
- if (vio.ntodo() > 0 && !vio.buffer.writer()->high_water())
+ if (!f.cluster && vio.ntodo() > 0 && !vio.buffer.writer()->high_water())
goto Lread;
return EVENT_CONT;
}
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cache/I_Cache.h
----------------------------------------------------------------------
diff --git a/iocore/cache/I_Cache.h b/iocore/cache/I_Cache.h
index 00e4791..cd8dda9 100644
--- a/iocore/cache/I_Cache.h
+++ b/iocore/cache/I_Cache.h
@@ -193,7 +193,7 @@ struct CacheVConnection:public VConnection
virtual void set_http_info(CacheHTTPInfo *info) = 0;
virtual void get_http_info(CacheHTTPInfo **info) = 0;
#endif
-
+ virtual bool is_read_from_writer() = 0;
virtual bool is_ram_cache_hit() const = 0;
virtual bool set_disk_io_priority(int priority) = 0;
virtual int get_disk_io_priority() = 0;
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cache/P_Cache.h
----------------------------------------------------------------------
diff --git a/iocore/cache/P_Cache.h b/iocore/cache/P_Cache.h
index ed738e3..e6bba36 100644
--- a/iocore/cache/P_Cache.h
+++ b/iocore/cache/P_Cache.h
@@ -44,4 +44,6 @@
#include "P_CacheInternal.h"
#include "P_CacheHosting.h"
#include "P_CacheHttp.h"
+#include "clusterinterface.h"
+
#endif /* _P_CACHE_H */
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cache/P_CacheInternal.h
----------------------------------------------------------------------
diff --git a/iocore/cache/P_CacheInternal.h b/iocore/cache/P_CacheInternal.h
index 4f33fdc..e5f0a11 100644
--- a/iocore/cache/P_CacheInternal.h
+++ b/iocore/cache/P_CacheInternal.h
@@ -368,7 +368,9 @@ struct CacheVC: public CacheVConnection
virtual time_t get_pin_in_cache();
virtual bool set_disk_io_priority(int priority);
virtual int get_disk_io_priority();
-
+ virtual bool is_read_from_writer() {
+ return f.read_from_writer_called;
+ }
/** Get the fragment table.
@return The address of the start of the fragment table,
or @c NULL if there is no fragment table.
@@ -505,6 +507,7 @@ struct CacheVC: public CacheVConnection
#ifdef HTTP_CACHE
unsigned int allow_empty_doc:1; // used for cache empty http document
#endif
+ unsigned int cluster:1;
} f;
};
// BTF optimization used to skip reading stuff in cache partition that doesn't contain any
@@ -1054,7 +1057,7 @@ struct Cache
Action *open_write(Continuation *cont, URL *url, CacheHTTPHdr *request,
CacheHTTPInfo *old_info, time_t pin_in_cache = (time_t) 0,
CacheFragType type = CACHE_FRAG_TYPE_HTTP);
- static void generate_key(INK_MD5 *md5, URL *url, CacheHTTPHdr *request);
+ static void generate_key(INK_MD5 *md5, URL *url, CacheHTTPHdr *request = 0);
#endif
Action *link(Continuation *cont, CacheKey *from, CacheKey *to, CacheFragType type, char *hostname, int host_len);
[2/6] refine the codes of cluster
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/message.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/message.cc b/iocore/cluster/message.cc
new file mode 100644
index 0000000..aa364f2
--- /dev/null
+++ b/iocore/cluster/message.cc
@@ -0,0 +1,229 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/epoll.h>
+#include "Diags.h"
+#include "global.h"
+#include "machine.h"
+#include "nio.h"
+#include "clusterinterface.h"
+#include "session.h"
+#ifndef TS_INLINE
+#define TS_INLINE inline
+#endif
+#include "I_IOBuffer.h"
+#include "P_Cluster.h"
+#include "message.h"
+
+#ifndef USE_MULTI_ALLOCATOR
+Allocator out_message_allocator("OutMessage", sizeof(OutMessage), 1024);
+#endif
+
+inline int64_t get_total_size(IOBufferBlock *blocks) {
+ IOBufferBlock *b = blocks;
+ int64_t total_avail = 0;
+ while (b != NULL) {
+ total_avail += b->read_avail();
+ b = b->next;
+ }
+ return total_avail;
+}
+
+int cluster_send_message(ClusterSession session, const int func_id,
+ void *data, const int data_len, const MessagePriority priority)
+{
+ MachineSessions *pMachineSessions;
+ SessionEntry *pSessionEntry;
+ SocketContext *pSockContext;
+ OutMessage *pMessage;
+ int result;
+
+ if ((result=get_session_for_send(&session, &pMachineSessions,
+ &pSessionEntry)) != 0)
+ {
+ return result;
+ }
+
+ pSockContext = pSessionEntry->sock_context;
+ if (pSockContext == NULL) { //session closed
+ return ENOENT;
+ }
+
+#ifdef USE_MULTI_ALLOCATOR
+ pMessage = (OutMessage *)pSockContext->out_msg_allocator->alloc_void();
+#else
+ pMessage = (OutMessage *)out_message_allocator.alloc_void();
+#endif
+
+ if (pMessage == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, (int)sizeof(OutMessage), errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+#ifdef MSG_TIME_STAT_FLAG
+ int session_index;
+ session_index = session.fields.seq % max_session_count_per_machine;
+ SESSION_LOCK(pMachineSessions, session_index);
+
+ if (session.fields.ip == my_machine_ip) { //request by me
+ if (pSessionEntry->client_start_time == 0) {
+ pSessionEntry->client_start_time = CURRENT_NS();
+ }
+ }
+
+ if (pSessionEntry->send_start_time == 0) {
+ pSessionEntry->send_start_time = CURRENT_NS();
+ }
+
+ SESSION_UNLOCK(pMachineSessions, session_index);
+#endif
+
+ do {
+#ifdef CHECK_MAGIC_NUMBER
+ pMessage->header.magic = MAGIC_NUMBER;
+#endif
+ pMessage->header.func_id = func_id;
+ pMessage->header.session_id = session;
+ pMessage->header.msg_seq = ink_atomic_increment(
+ &pSessionEntry->current_msg_seq, 1) + 1;
+ pMessage->in_queue_time = CURRENT_NS();
+ pMessage->bytes_sent = 0;
+ pMessage->blocks.m_ptr = NULL;
+ pMessage->next = NULL;
+
+ if (data_len < 0) { //object
+ pMessage->data_type = DATA_TYPE_OBJECT;
+ pMessage->blocks = (IOBufferBlock *)data;
+ pMessage->header.data_len = get_total_size(pMessage->blocks);
+ }
+ else {
+ if (data_len > MINI_MESSAGE_SIZE) {
+ Error("file: "__FILE__", line: %d, "
+ "invalid data length: %d exceeds %d!",
+ __LINE__, data_len, MINI_MESSAGE_SIZE);
+ result = errno != 0 ? errno : ENOMEM;
+ break;
+ }
+
+ pMessage->data_type = DATA_TYPE_BUFFER;
+ pMessage->blocks = NULL;
+ pMessage->header.data_len = data_len;
+ memcpy(pMessage->mini_buff, data, data_len);
+ }
+
+ pMessage->header.aligned_data_len = BYTE_ALIGN8(
+ pMessage->header.data_len);
+ result = push_to_send_queue(pSockContext,
+ pMessage, priority, pSessionEntry->version);
+ } while (0);
+
+ if (result != 0) {
+ release_out_message(pSockContext, pMessage);
+ }
+
+ return result;
+}
+
+int cluster_send_msg_internal_ex(const ClusterSession *session,
+ SocketContext *pSockContext, const int func_id,
+ void *data, const int data_len, const MessagePriority priority,
+ push_to_send_queue_func push_to_queue_func)
+{
+ OutMessage *pMessage;
+ int result;
+
+ if (pSockContext == NULL) { //session closed
+ return ENOENT;
+ }
+
+#ifdef USE_MULTI_ALLOCATOR
+ pMessage = (OutMessage *)pSockContext->out_msg_allocator->alloc_void();
+#else
+ pMessage = (OutMessage *)out_message_allocator.alloc_void();
+#endif
+
+ if (pMessage == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, (int)sizeof(OutMessage), errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ do {
+#ifdef CHECK_MAGIC_NUMBER
+ pMessage->header.magic = MAGIC_NUMBER;
+#endif
+ pMessage->header.func_id = func_id;
+ pMessage->header.session_id = *session;
+ pMessage->header.msg_seq = 11111;
+ pMessage->in_queue_time = CURRENT_NS();
+ pMessage->bytes_sent = 0;
+ pMessage->blocks.m_ptr = NULL;
+ pMessage->next = NULL;
+
+ if (data_len < 0) { //object
+ pMessage->data_type = DATA_TYPE_OBJECT;
+ pMessage->blocks = (IOBufferBlock *)data;
+ pMessage->header.data_len = get_total_size(pMessage->blocks);
+ }
+ else {
+ if (data_len > MINI_MESSAGE_SIZE) {
+ Error("file: "__FILE__", line: %d, "
+ "invalid data length: %d exceeds %d!",
+ __LINE__, data_len, MINI_MESSAGE_SIZE);
+ result = errno != 0 ? errno : ENOMEM;
+ break;
+ }
+
+ pMessage->data_type = DATA_TYPE_BUFFER;
+ pMessage->blocks = NULL;
+ pMessage->header.data_len = data_len;
+ if (data_len > 0) {
+ memcpy(pMessage->mini_buff, data, data_len);
+ }
+ }
+
+ pMessage->header.aligned_data_len = BYTE_ALIGN8(
+ pMessage->header.data_len);
+ result = push_to_queue_func(pSockContext, pMessage, priority);
+ } while (0);
+
+ if (result != 0) {
+ release_out_message(pSockContext, pMessage);
+ }
+
+ return result;
+}
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/message.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/message.h b/iocore/cluster/message.h
new file mode 100644
index 0000000..d948927
--- /dev/null
+++ b/iocore/cluster/message.h
@@ -0,0 +1,75 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _MESSAGE_H_
+#define _MESSAGE_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+
+struct HelloMessage
+{
+ uint32_t major; //major version
+ uint32_t minor; //minor version
+ uint32_t min_major;
+ uint32_t min_minor;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef USE_MULTI_ALLOCATOR
+ extern Allocator out_message_allocator;
+#endif
+
+typedef int (*push_to_send_queue_func)(SocketContext *pSockContext, OutMessage *pMessage,
+ const MessagePriority priority);
+
+int cluster_send_msg_internal_ex(const ClusterSession *session,
+ SocketContext *pSockContext, const int func_id,
+ void *data, const int data_len, const MessagePriority priority,
+ push_to_send_queue_func push_to_queue_func);
+
+inline void release_out_message(SocketContext *pSockContext,
+ OutMessage *msg)
+{
+ if (msg->data_type == DATA_TYPE_OBJECT && msg->blocks != NULL) {
+ msg->blocks = NULL;
+ }
+#ifdef USE_MULTI_ALLOCATOR
+ pSockContext->out_msg_allocator->free_void(msg);
+#else
+ (void)pSockContext;
+ out_message_allocator.free_void(msg);
+#endif
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/nio.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/nio.cc b/iocore/cluster/nio.cc
new file mode 100644
index 0000000..b699d84
--- /dev/null
+++ b/iocore/cluster/nio.cc
@@ -0,0 +1,1701 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#if defined(linux)
+#include <sys/prctl.h>
+#endif
+#include "Diags.h"
+#include "global.h"
+#include "machine.h"
+#include "session.h"
+#include "message.h"
+#include "connection.h"
+#ifndef TS_INLINE
+#define TS_INLINE inline
+#endif
+#include "I_IOBuffer.h"
+#include "I_EventSystem.h"
+#include "P_Cluster.h"
+#include "P_RecCore.h"
+#include "ink_config.h"
+#include "EventPoll.h"
+#include "nio.h"
+
+int cluster_worker_thread_count = 0;
+WorkerThreadContext *cluster_worker_thread_contexts = NULL;
+static int read_buffer_size = 2 * 1024 * 1024;
+
+static ink_mutex worker_thread_lock;
+
+static void *work_thread_entrance(void* arg);
+static void clear_send_queue(SocketContext * pSockContext, const bool warning);
+
+message_deal_func cluster_msg_deal_func = NULL;
+machine_change_notify_func cluster_machine_change_notify = NULL;
+
+struct NIORecords {
+ RecRecord * send_retry_count;
+
+ RecRecord * send_wait_time;
+ RecRecord * epoll_wait_count;
+ RecRecord * epoll_wait_time_used;
+ RecRecord * loop_usleep_count;
+ RecRecord * loop_usleep_time;
+ RecRecord * io_loop_interval;
+
+#ifdef DEBUG
+ RecRecord * max_write_loop_time_used;
+ RecRecord * max_read_loop_time_used;
+ RecRecord * max_epoll_time_used;
+ RecRecord * max_usleep_time_used;
+ RecRecord * max_callback_time_used;
+#endif
+};
+
+static NIORecords nio_records = {NULL, NULL, NULL, NULL, NULL, NULL, NULL
+#ifdef DEBUG
+ , NULL, NULL, NULL, NULL, NULL
+#endif
+};
+
+static int send_wait_time = 1 * HRTIME_MSECOND; //write wait time calc by cluster IO
+static int io_loop_interval = 0; //us
+
+#ifdef DEBUG
+static volatile int64_t max_write_loop_time_used = 0;
+static volatile int64_t max_read_loop_time_used = 0;
+static volatile int64_t max_epoll_time_used = 0;
+static volatile int64_t max_usleep_time_used = 0;
+static volatile int64_t max_callback_time_used = 0;
+#endif
+
+inline int get_iovec(IOBufferBlock *blocks, IOVec *iovec, int size) {
+ int niov;
+ IOBufferBlock *b = blocks;
+ niov = 0;
+ while (b != NULL && niov < size) {
+ int64_t a = b->read_avail();
+ if (a > 0) {
+ iovec[niov].iov_len = a;
+ iovec[niov].iov_base = b->_start;
+ ++niov;
+ }
+ b = b->next;
+ }
+
+ return niov;
+}
+
+inline void consume(OutMessage *pMessage, int64_t l) {
+ while (pMessage->blocks != NULL) {
+ int64_t r = pMessage->blocks->read_avail();
+ if (l < r) {
+ pMessage->blocks->consume(l);
+ break;
+ } else {
+ l -= r;
+ pMessage->blocks = pMessage->blocks->next;
+ }
+ }
+}
+
+static void init_nio_stats()
+{
+ RecData data_default;
+ memset(&data_default, 0, sizeof(RecData));
+
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.send_msg_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.drop_msg_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.send_bytes", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.drop_bytes", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.recv_msg_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.recv_bytes", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.enqueue_in_msg_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.enqueue_in_msg_bytes", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.dequeue_in_msg_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.dequeue_in_msg_bytes", 0, RECP_NON_PERSISTENT);
+
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.call_writev_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.call_read_count", 0, RECP_NON_PERSISTENT);
+
+ nio_records.send_retry_count = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.send_retry_count", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.epoll_wait_count = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.epoll_wait_count", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.epoll_wait_time_used = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.epoll_wait_time_used", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.loop_usleep_count = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.loop_usleep_count", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.loop_usleep_time = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.loop_usleep_time", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.send_wait_time = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.send_wait_time", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.io_loop_interval = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.loop_interval", RECD_INT, data_default, RECP_NON_PERSISTENT);
+
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.ping_total_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.ping_success_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.ping_time_used", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.send_delayed_time", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.push_msg_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.push_msg_bytes", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.fail_msg_count", 0, RECP_NON_PERSISTENT);
+ RecRegisterStatInt(RECT_PROCESS, "proxy.process.cluster.io.fail_msg_bytes", 0, RECP_NON_PERSISTENT);
+
+#ifdef DEBUG
+ nio_records.max_write_loop_time_used = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.max_write_loop_time_used", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.max_read_loop_time_used = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.max_read_loop_time_used", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.max_epoll_time_used = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.max_epoll_time_used", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.max_usleep_time_used = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.max_usleep_time_used", RECD_INT, data_default, RECP_NON_PERSISTENT);
+ nio_records.max_callback_time_used = RecRegisterStat(RECT_PROCESS,
+ "proxy.process.cluster.io.max_callback_time_used", RECD_INT, data_default, RECP_NON_PERSISTENT);
+#endif
+}
+
+void log_nio_stats()
+{
+ RecData data;
+ WorkerThreadContext *pThreadContext;
+ WorkerThreadContext *pContextEnd;
+ SocketStats sum = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ static time_t last_calc_bps_time = CURRENT_TIME();
+ static int64_t last_send_bytes = 0;
+
+ pContextEnd = cluster_worker_thread_contexts + num_of_cluster_threads;
+ for (pThreadContext=cluster_worker_thread_contexts; pThreadContext<pContextEnd;
+ pThreadContext++)
+ {
+ sum.send_msg_count += pThreadContext->stats.send_msg_count;
+ sum.drop_msg_count += pThreadContext->stats.drop_msg_count;
+ sum.send_bytes += pThreadContext->stats.send_bytes;
+ sum.drop_bytes += pThreadContext->stats.drop_bytes;
+ sum.call_writev_count += pThreadContext->stats.call_writev_count;
+ sum.send_retry_count += pThreadContext->stats.send_retry_count;
+ sum.recv_msg_count += pThreadContext->stats.recv_msg_count;
+ sum.recv_bytes += pThreadContext->stats.recv_bytes;
+ sum.enqueue_in_msg_count += pThreadContext->stats.enqueue_in_msg_count;
+ sum.enqueue_in_msg_bytes += pThreadContext->stats.enqueue_in_msg_bytes;
+ sum.dequeue_in_msg_count += pThreadContext->stats.dequeue_in_msg_count;
+ sum.dequeue_in_msg_bytes += pThreadContext->stats.dequeue_in_msg_bytes;
+ sum.call_read_count += pThreadContext->stats.call_read_count;
+ sum.epoll_wait_count += pThreadContext->stats.epoll_wait_count;
+ sum.epoll_wait_time_used += pThreadContext->stats.epoll_wait_time_used;
+ sum.loop_usleep_count += pThreadContext->stats.loop_usleep_count;
+ sum.loop_usleep_time += pThreadContext->stats.loop_usleep_time;
+ sum.ping_total_count += pThreadContext->stats.ping_total_count;
+ sum.ping_success_count += pThreadContext->stats.ping_success_count;
+ sum.ping_time_used += pThreadContext->stats.ping_time_used;
+ sum.send_delayed_time += pThreadContext->stats.send_delayed_time;
+ sum.push_msg_count += pThreadContext->stats.push_msg_count;
+ sum.push_msg_bytes += pThreadContext->stats.push_msg_bytes;
+ sum.fail_msg_count += pThreadContext->stats.fail_msg_count;
+ sum.fail_msg_bytes += pThreadContext->stats.fail_msg_bytes;
+ }
+
+ data.rec_int = sum.send_msg_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.send_msg_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.drop_msg_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.drop_msg_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.send_bytes;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.send_bytes", RECD_INT, &data, NULL);
+ data.rec_int = sum.drop_bytes;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.drop_bytes", RECD_INT, &data, NULL);
+ data.rec_int = sum.recv_msg_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.recv_msg_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.recv_bytes;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.recv_bytes", RECD_INT, &data, NULL);
+ data.rec_int = sum.enqueue_in_msg_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.enqueue_in_msg_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.enqueue_in_msg_bytes;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.enqueue_in_msg_bytes", RECD_INT, &data, NULL);
+ data.rec_int = sum.dequeue_in_msg_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.dequeue_in_msg_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.dequeue_in_msg_bytes;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.dequeue_in_msg_bytes", RECD_INT, &data, NULL);
+ data.rec_int = sum.ping_total_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.ping_total_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.ping_success_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.ping_success_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.ping_time_used;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.ping_time_used", RECD_INT, &data, NULL);
+ data.rec_int = sum.send_delayed_time;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.send_delayed_time", RECD_INT, &data, NULL);
+ data.rec_int = sum.push_msg_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.push_msg_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.push_msg_bytes;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.push_msg_bytes", RECD_INT, &data, NULL);
+ data.rec_int = sum.fail_msg_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.fail_msg_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.fail_msg_bytes;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.fail_msg_bytes", RECD_INT, &data, NULL);
+ data.rec_int = sum.call_writev_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.call_writev_count", RECD_INT, &data, NULL);
+ data.rec_int = sum.call_read_count;
+ RecSetRecord(RECT_PROCESS, "proxy.process.cluster.io.call_read_count", RECD_INT, &data, NULL);
+
+ RecDataSetFromInk64(RECD_INT, &nio_records.send_retry_count->data,
+ sum.send_retry_count);
+ RecDataSetFromInk64(RECD_INT, &nio_records.epoll_wait_count->data,
+ sum.epoll_wait_count);
+ RecDataSetFromInk64(RECD_INT, &nio_records.epoll_wait_time_used->data,
+ sum.epoll_wait_time_used);
+ RecDataSetFromInk64(RECD_INT, &nio_records.loop_usleep_count->data,
+ sum.loop_usleep_count);
+ RecDataSetFromInk64(RECD_INT, &nio_records.loop_usleep_time->data,
+ sum.loop_usleep_time);
+
+#ifdef DEBUG
+ RecDataSetFromInk64(RECD_INT, &nio_records.max_write_loop_time_used->data,
+ max_write_loop_time_used);
+ RecDataSetFromInk64(RECD_INT, &nio_records.max_read_loop_time_used->data,
+ max_read_loop_time_used);
+ RecDataSetFromInk64(RECD_INT, &nio_records.max_epoll_time_used->data,
+ max_epoll_time_used);
+ RecDataSetFromInk64(RECD_INT, &nio_records.max_usleep_time_used->data,
+ max_usleep_time_used);
+ RecDataSetFromInk64(RECD_INT, &nio_records.max_callback_time_used->data,
+ max_callback_time_used);
+#endif
+
+ int time_pass = CURRENT_TIME() - last_calc_bps_time;
+ if (time_pass > 0) {
+ double io_busy_ratio;
+ int64_t nio_current_bps = 8 * (sum.send_bytes - last_send_bytes) / time_pass;
+ last_calc_bps_time = CURRENT_TIME();
+ last_send_bytes = sum.send_bytes;
+
+ if (cluster_flow_ctrl_max_bps <= 0) {
+ send_wait_time = cluster_send_min_wait_time * HRTIME_USECOND;
+ io_loop_interval = cluster_min_loop_interval;
+ }
+ else {
+ if (nio_current_bps < cluster_flow_ctrl_min_bps) {
+ send_wait_time = cluster_send_min_wait_time * HRTIME_USECOND;
+ io_loop_interval = cluster_min_loop_interval;
+ }
+ else {
+ io_busy_ratio = (double)nio_current_bps / (double)cluster_flow_ctrl_max_bps;
+ if (io_busy_ratio > 1.0) {
+ io_busy_ratio = 1.0;
+ }
+ send_wait_time = (int)((cluster_send_min_wait_time +
+ (cluster_send_max_wait_time - cluster_send_min_wait_time) *
+ io_busy_ratio)) * HRTIME_USECOND;
+ io_loop_interval = cluster_min_loop_interval + (int)((
+ cluster_max_loop_interval - cluster_min_loop_interval) * io_busy_ratio);
+ }
+ RecDataSetFromInk64(RECD_INT, &nio_records.send_wait_time->data,
+ send_wait_time / HRTIME_USECOND);
+ RecDataSetFromInk64(RECD_INT, &nio_records.io_loop_interval->data,
+ io_loop_interval);
+ }
+ }
+}
+
+int nio_init()
+{
+ int result;
+ int bytes;
+ int total_connections;
+ int max_connections_per_thread;
+ WorkerThreadContext *pThreadContext;
+ WorkerThreadContext *pContextEnd;
+
+ REC_EstablishStaticConfigInt32(read_buffer_size, "proxy.config.cluster.read_buffer_size");
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "read_buffer_size: %d", __LINE__, read_buffer_size);
+
+ if ((result=ink_mutex_init(&worker_thread_lock, "worker_lock")) != 0) {
+ return result;
+ }
+
+ bytes = sizeof(WorkerThreadContext) * num_of_cluster_threads;
+ cluster_worker_thread_contexts = (WorkerThreadContext *)malloc(bytes);
+ if (cluster_worker_thread_contexts == NULL) {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ memset(cluster_worker_thread_contexts, 0, bytes);
+
+ total_connections = num_of_cluster_connections * (MAX_MACHINE_COUNT - 1);
+ max_connections_per_thread = total_connections / num_of_cluster_threads;
+ if (total_connections % num_of_cluster_threads != 0) {
+ max_connections_per_thread++;
+ }
+
+ cluster_worker_thread_count = 0;
+ pContextEnd = cluster_worker_thread_contexts + num_of_cluster_threads;
+ for (pThreadContext=cluster_worker_thread_contexts; pThreadContext<pContextEnd; pThreadContext++)
+ {
+ pThreadContext->thread_index = (int)(pThreadContext - cluster_worker_thread_contexts);
+ pThreadContext->alloc_size = max_connections_per_thread;
+
+ pThreadContext->ev_poll = new EventPoll(pThreadContext->alloc_size, 1);
+ if (pThreadContext->ev_poll == NULL)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "new EventPoll fail, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ bytes = sizeof(SocketContext *) * pThreadContext->alloc_size;
+ pThreadContext->active_sockets = (SocketContext **)malloc(bytes);
+ if (pThreadContext->active_sockets == NULL)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "malloc %d bytes fail, errno: %d, error info: %s",
+ __LINE__, bytes, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ if ((result=ink_mutex_init(&pThreadContext->lock, "context_lock")) != 0)
+ {
+ return result;
+ }
+
+ if (ink_thread_create(work_thread_entrance, pThreadContext) == 0)
+ {
+ result = errno != 0 ? errno : ENOMEM;
+ Error("file: "__FILE__", line: %d, "
+ "create thread failed, startup threads: %d, "
+ "errno: %d, error info: %s",
+ __LINE__, cluster_worker_thread_count,
+ result, strerror(result));
+ break;
+ }
+ else
+ {
+ if ((result=ink_mutex_acquire(&worker_thread_lock)) != 0) {
+ Error("file: "__FILE__", line: %d, "
+ "call ink_mutex_acquire fail, "
+ "errno: %d, error info: %s",
+ __LINE__, result, strerror(result));
+ }
+ cluster_worker_thread_count++;
+ if ((result=ink_mutex_release(&worker_thread_lock)) != 0) {
+ Error("file: "__FILE__", line: %d, "
+ "call ink_mutex_release fail, "
+ "errno: %d, error info: %s",
+ __LINE__, result, strerror(result));
+ }
+ }
+ }
+
+ init_nio_stats();
+
+ return 0;
+}
+
+int nio_destroy()
+{
+ ink_mutex_destroy(&worker_thread_lock);
+ return 0;
+}
+
+int cluster_global_init(message_deal_func deal_func,
+ machine_change_notify_func machine_change_notify)
+{
+ cluster_msg_deal_func = deal_func;
+ cluster_machine_change_notify = machine_change_notify;
+ return 0;
+}
+
+#define ALLOC_READER_BUFFER(reader, len) \
+ do { \
+ reader.buffer = new_RecvBuffer(len); \
+ reader.current = reader.buffer->_data; \
+ reader.buff_end = reader.buffer->_data + len; \
+ } while (0)
+
+#define INIT_READER(reader, len) \
+ do { \
+ reader.buffer = new_RecvBuffer(len); \
+ reader.current = reader.msg_header = reader.buffer->_data; \
+ reader.buff_end = reader.msg_header + len; \
+ } while (0)
+
+#define MOVE_TO_NEW_BUFFER(pSockContext, msg_bytes) \
+ do { \
+ Ptr<IOBufferData> oldBuffer; \
+ char *old_msg_header; \
+ oldBuffer = pSockContext->reader.buffer; \
+ old_msg_header = pSockContext->reader.msg_header; \
+ INIT_READER(pSockContext->reader, read_buffer_size); \
+ memcpy(pSockContext->reader.current, old_msg_header, msg_bytes); \
+ pSockContext->reader.current += msg_bytes; \
+ oldBuffer = NULL; \
+ } while (0)
+
+
+static int set_socket_rw_buff_size(int sock)
+{
+ int bytes;
+
+ if (cluster_send_buffer_size > 0) {
+ bytes = cluster_send_buffer_size;
+ if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
+ (char *)&bytes, sizeof(int)) < 0)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ }
+
+ if (cluster_receive_buffer_size > 0) {
+ bytes = cluster_receive_buffer_size;
+ if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
+ (char *)&bytes, sizeof(int)) < 0)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "setsockopt failed, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static int add_to_active_sockets(SocketContext *pSockContext)
+{
+ ink_mutex_acquire(&pSockContext->thread_context->lock);
+ pSockContext->thread_context->active_sockets[
+ pSockContext->thread_context->active_sock_count] = pSockContext;
+ pSockContext->thread_context->active_sock_count++;
+ ink_mutex_release(&pSockContext->thread_context->lock);
+ return 0;
+}
+
+static int remove_from_active_sockets(SocketContext *pSockContext)
+{
+ int result;
+ SocketContext **ppSockContext;
+ SocketContext **ppContextEnd;
+ SocketContext **ppCurrent;
+
+ ink_mutex_acquire(&pSockContext->thread_context->lock);
+ ppContextEnd = pSockContext->thread_context->active_sockets +
+ pSockContext->thread_context->active_sock_count;
+ for (ppSockContext=pSockContext->thread_context->active_sockets;
+ ppSockContext<ppContextEnd; ppSockContext++)
+ {
+ if (*ppSockContext == pSockContext) {
+ break;
+ }
+ }
+
+ if (ppSockContext == ppContextEnd) {
+ Error("file: "__FILE__", line: %d, "
+ "socket context for %s not found!", __LINE__,
+ pSockContext->machine->hostname);
+ result = ENOENT;
+ }
+ else {
+ for (ppCurrent=ppSockContext+1; ppCurrent<ppContextEnd; ppCurrent++) {
+ *(ppCurrent - 1) = *ppCurrent;
+ }
+ pSockContext->thread_context->active_sock_count--;
+ result = 0;
+ }
+ ink_mutex_release(&pSockContext->thread_context->lock);
+
+ return result;
+}
+
+int nio_add_to_epoll(SocketContext *pSockContext)
+{
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "%s:%d nio_add_to_epoll", __LINE__, pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port);
+ */
+
+ pSockContext->connected_time = CURRENT_TIME();
+ clear_send_queue(pSockContext, true);
+
+ pSockContext->queue_index = 0;
+ pSockContext->ping_start_time = 0;
+ pSockContext->ping_fail_count = 0;
+ pSockContext->next_write_time = CURRENT_NS() + send_wait_time;
+ pSockContext->next_ping_time = CURRENT_NS() + cluster_ping_send_interval;
+
+ INIT_READER(pSockContext->reader, read_buffer_size);
+ pSockContext->reader.recv_body_bytes = 0;
+
+ set_socket_rw_buff_size(pSockContext->sock);
+ init_machine_sessions(pSockContext->machine, false);
+ add_machine_sock_context(pSockContext);
+
+ if (pSockContext->thread_context->ev_poll->attach(pSockContext->sock,
+ EVENTIO_READ, pSockContext) < 0)
+ {
+ Error("file: " __FILE__ ", line: %d, "
+ "event poll attach fail, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ remove_machine_sock_context(pSockContext); //rollback
+ return errno != 0 ? errno : ENOMEM;
+ }
+
+ return add_to_active_sockets(pSockContext);
+}
+
+static void clear_send_queue(SocketContext * pSockContext, const bool warning)
+{
+ int i;
+ int count;
+ int64_t drop_bytes;
+ OutMessage *msg;
+ MessageQueue *send_queue;
+
+ count = 0;
+ drop_bytes = 0;
+ for (i=0; i<PRIORITY_COUNT; i++) {
+ send_queue = pSockContext->send_queues + i;
+ ink_mutex_acquire(&send_queue->lock);
+ pSockContext->version++;
+ while (send_queue->head != NULL) {
+ msg = send_queue->head;
+ send_queue->head = send_queue->head->next;
+ drop_bytes += MSG_HEADER_LENGTH + msg->header.aligned_data_len;
+ release_out_message(pSockContext, msg);
+ count++;
+ }
+ send_queue->tail = NULL;
+ ink_mutex_release(&send_queue->lock);
+ }
+
+ if (count > 0) {
+ char buff[256];
+ sprintf(buff, "file: " __FILE__ ", line: %d, "
+ "release %s:%d message count: %d",
+ __LINE__, pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port, count);
+ if (warning) {
+ Warning("%s", buff);
+ }
+ else {
+ Debug(CLUSTER_DEBUG_TAG, "%s", buff);
+ }
+
+ pSockContext->thread_context->stats.drop_msg_count += count;
+ pSockContext->thread_context->stats.drop_bytes += drop_bytes;
+ }
+}
+
+static int close_socket(SocketContext * pSockContext)
+{
+ if (pSockContext->thread_context->ev_poll->detach(pSockContext->sock) < 0) {
+ Error("file: " __FILE__ ", line: %d, "
+ "event poll detach fail, errno: %d, error info: %s",
+ __LINE__, errno, strerror(errno));
+ return errno != 0 ? errno : ENOMEM;
+ }
+ close(pSockContext->sock);
+ pSockContext->sock = -1;
+
+ remove_from_active_sockets(pSockContext);
+ machine_remove_connection(pSockContext);
+
+ pSockContext->reader.blocks = NULL;
+ pSockContext->reader.buffer = NULL;
+
+ clear_send_queue(pSockContext, false);
+ notify_connection_closed(pSockContext);
+
+ if (pSockContext->connect_type == CONNECT_TYPE_CLIENT) {
+ make_connection(pSockContext);
+ }
+ else {
+ free_accept_sock_context(pSockContext);
+ }
+
+ return 0;
+}
+
+inline static int send_ping_message(SocketContext *pSockContext)
+{
+ ClusterSession session;
+
+ //ping message do NOT care session id
+ session.fields.ip = my_machine_ip;
+ session.fields.timestamp = CURRENT_TIME();
+ session.fields.seq = 0; //just use 0
+ return cluster_send_msg_internal_ex(&session,
+ pSockContext, FUNC_ID_CLUSTER_PING_REQUEST, NULL, 0, PRIORITY_HIGH,
+ insert_into_send_queue_head);
+}
+
+static int deal_write_event(SocketContext * pSockContext)
+{
+#define BUFF_TYPE_HEADER 'H'
+#define BUFF_TYPE_DATA 'D'
+#define BUFF_TYPE_PADDING 'P'
+
+ MessageQueue *send_queue;
+ struct iovec write_vec[WRITEV_ARRAY_SIZE];
+ struct {
+ int priority;
+ int index; //message index
+ int buff_type; //message data or header
+ } msg_indexes[WRITEV_ARRAY_SIZE];
+
+ struct {
+ OutMessage *send_msgs[WRITEV_ITEM_ONCE];
+ OutMessage *done_msgs[WRITEV_ITEM_ONCE];
+ OutMessage **pDoneMsgs;
+ int msg_count;
+ int done_count;
+ } msgs[PRIORITY_COUNT];
+
+ OutMessage *msg;
+ int write_bytes;
+ int remain_len;
+ int priority;
+ int start;
+ int total_msg_count;
+ int vec_count;
+ int total_bytes;
+ int total_done_count;
+ int result;
+ int i, k;
+ bool fetch_done;
+ bool last_msg_complete;
+
+ msgs[0].msg_count = msgs[1].msg_count = msgs[2].msg_count = 0;
+ total_msg_count = 0;
+ vec_count = 0;
+ total_bytes = 0;
+
+ priority = pSockContext->queue_index;
+ if (pSockContext->queue_index == 0) {
+ start = 1; //only loop 3 times
+ }
+ else {
+ start = 0; //need loop 4 times
+ }
+
+ last_msg_complete = false;
+ fetch_done = false;
+ for (i=start; i<=PRIORITY_COUNT; i++) {
+ send_queue = pSockContext->send_queues + priority;
+ ink_mutex_acquire(&send_queue->lock);
+ msg = send_queue->head;
+ if (pSockContext->queue_index > 0 &&
+ i == pSockContext->queue_index + 1)
+ {
+ if (msg != NULL) {
+ msg = msg->next; //should skip to next for the first already consumed
+ }
+ }
+ while (msg != NULL) {
+ if (msg->bytes_sent < MSG_HEADER_LENGTH) { //should send header
+ write_vec[vec_count].iov_base = ((char *)&msg->header) +
+ msg->bytes_sent;
+ write_vec[vec_count].iov_len = MSG_HEADER_LENGTH -
+ msg->bytes_sent;
+ total_bytes += write_vec[vec_count].iov_len;
+ msg_indexes[vec_count].priority = priority;
+ msg_indexes[vec_count].buff_type = BUFF_TYPE_HEADER;
+ msg_indexes[vec_count].index = msgs[priority].msg_count;
+ vec_count++;
+
+ remain_len = msg->header.aligned_data_len;
+ }
+ else {
+ remain_len = (msg->header.aligned_data_len + MSG_HEADER_LENGTH) -
+ msg->bytes_sent;
+ }
+
+ if (remain_len > 0) {
+ int pad_len;
+ int remain_data_len;
+ pad_len = msg->header.aligned_data_len - msg->header.data_len;
+ remain_data_len = remain_len - pad_len;
+ if (remain_data_len > 0) {
+ if (msg->data_type == DATA_TYPE_OBJECT) {
+ int read_count;
+ int64_t read_bytes;
+
+ read_count = get_iovec(msg->blocks, write_vec + vec_count,
+ WRITEV_ARRAY_SIZE - 1 - vec_count);
+ read_bytes = 0;
+ for (k=0; k<read_count; k++) {
+ read_bytes += write_vec[vec_count].iov_len;
+ msg_indexes[vec_count].priority = priority;
+ msg_indexes[vec_count].buff_type = BUFF_TYPE_DATA;
+ msg_indexes[vec_count].index = msgs[priority].msg_count;
+ vec_count++;
+ }
+ //assert(read_bytes <= remain_data_len);
+
+ total_bytes += read_bytes;
+ last_msg_complete = read_bytes == remain_data_len;
+ }
+ else {
+ write_vec[vec_count].iov_base = msg->mini_buff +
+ (msg->header.data_len - remain_data_len);
+ write_vec[vec_count].iov_len = remain_data_len;
+ total_bytes += write_vec[vec_count].iov_len;
+ msg_indexes[vec_count].priority = priority;
+ msg_indexes[vec_count].buff_type = BUFF_TYPE_DATA;
+ msg_indexes[vec_count].index = msgs[priority].msg_count;
+ vec_count++;
+ last_msg_complete = true;
+ }
+ }
+ else { //no more data
+ last_msg_complete = true;
+ }
+
+ if (pad_len > 0 && last_msg_complete) {
+ write_vec[vec_count].iov_base = pSockContext->padding;
+ write_vec[vec_count].iov_len = (remain_data_len > 0) ?
+ pad_len : remain_len;
+ total_bytes += write_vec[vec_count].iov_len;
+ msg_indexes[vec_count].priority = priority;
+ msg_indexes[vec_count].buff_type = BUFF_TYPE_PADDING;
+ msg_indexes[vec_count].index = msgs[priority].msg_count;
+ vec_count++;
+ }
+ }
+ else {
+ last_msg_complete = true;
+ }
+
+ msgs[priority].send_msgs[msgs[priority].msg_count++] = msg;
+ total_msg_count++;
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "%s:%d sending msg, data body: %d, msg send bytes: %d, total_bytes: %d",
+ __LINE__,
+ pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port,
+ msg->header.data_len,
+ msg->bytes_sent, total_bytes);
+ */
+ if (total_msg_count == WRITEV_ITEM_ONCE ||
+ vec_count >= WRITEV_ARRAY_SIZE - 2 ||
+ total_bytes >= WRITE_MAX_COMBINE_BYTES)
+ {
+ fetch_done = true;
+ break;
+ }
+ if (i == 0) { //fetch only one, the head message
+ break;
+ }
+ msg = msg->next;
+ }
+ ink_mutex_release(&send_queue->lock);
+
+ if (fetch_done) {
+ break;
+ }
+
+ if (i == 0) {
+ priority = 0; //next should start from first priority
+ }
+ else {
+ priority++;
+ }
+ }
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "==wwwwww==file: " __FILE__ ", line: %d, "
+ "%s:%d total_bytes: %d, vec_count: %d, total_msg_count: %d", __LINE__,
+ pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port,
+ total_bytes, vec_count, total_msg_count);
+ */
+
+ if (vec_count == 0) {
+ return EAGAIN;
+ }
+
+ pSockContext->thread_context->stats.send_retry_count += total_msg_count;
+ pSockContext->thread_context->stats.call_writev_count++;
+ write_bytes = writev(pSockContext->sock, write_vec, vec_count);
+ if (write_bytes == 0) { //connection closed
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "write to %s fail, connection closed",
+ __LINE__, pSockContext->machine->hostname);
+ return ECONNRESET;
+ }
+ else if (write_bytes < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ return EAGAIN;
+ }
+ else if (errno == EINTR) { //should try again
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__ ", line: %d, "
+ "write to %s fail, errno: %d, error info: %s",
+ __LINE__, pSockContext->machine->hostname,
+ errno, strerror(errno));
+ return 0;
+ }
+ else {
+ result = errno != 0 ? errno : EIO;
+ Error("file: "__FILE__", line: %d, "
+ "write to %s fail, errno: %d, error info: %s",
+ __LINE__, pSockContext->machine->hostname,
+ result, strerror(result));
+ return result;
+ }
+ }
+
+ pSockContext->thread_context->stats.send_bytes += write_bytes;
+ if (write_bytes == total_bytes && fetch_done) { //send done and have more message to send
+ result = 0;
+ }
+ else {
+ result = EAGAIN;
+ }
+
+ if (write_bytes == total_bytes && last_msg_complete) { //all done
+ for (i=0; i<PRIORITY_COUNT; i++) {
+ msgs[i].pDoneMsgs = msgs[i].send_msgs;
+ msgs[i].done_count = msgs[i].msg_count;
+ }
+
+ total_done_count = total_msg_count;
+ pSockContext->queue_index = 0;
+ }
+ else {
+ int vi;
+ int remain_bytes;
+ int done_index;
+
+ for (i=0; i<PRIORITY_COUNT; i++) {
+ msgs[i].pDoneMsgs = msgs[i].done_msgs;
+ msgs[i].done_count = 0;
+ }
+ total_done_count = 0;
+
+ remain_bytes = write_bytes;
+ for (vi=0; vi<vec_count; vi++) {
+ remain_bytes -= write_vec[vi].iov_len;
+ msg = msgs[msg_indexes[vi].priority].send_msgs[msg_indexes[vi].index];
+
+ if (remain_bytes >= 0) {
+ if (msg->data_type == DATA_TYPE_OBJECT &&
+ msg_indexes[vi].buff_type == BUFF_TYPE_DATA)
+ {
+ consume(msg, write_vec[vi].iov_len);
+ }
+ msg->bytes_sent += write_vec[vi].iov_len;
+
+ if (msg->bytes_sent >= MSG_HEADER_LENGTH + msg->header.aligned_data_len) {
+ total_done_count++;
+ done_index = msgs[msg_indexes[vi].priority].done_count++;
+ msgs[msg_indexes[vi].priority].done_msgs[done_index] = msg;
+ }
+ }
+ else {
+ if (msg->data_type == DATA_TYPE_OBJECT &&
+ msg_indexes[vi].buff_type == BUFF_TYPE_DATA)
+ {
+ consume(msg, remain_bytes + write_vec[vi].iov_len);
+ }
+ msg->bytes_sent += remain_bytes + write_vec[vi].iov_len;
+
+ break;
+ }
+ }
+
+ if (vi < vec_count) {
+ pSockContext->queue_index = msg_indexes[vi].priority; //the first not done msg
+ }
+ else {
+ pSockContext->queue_index = msg_indexes[vi - 1].priority; //the first not done msg
+ }
+
+ if (total_done_count == 0) {
+ return result;
+ }
+ }
+ pSockContext->thread_context->stats.send_msg_count += total_done_count;
+
+ for (i=0; i<PRIORITY_COUNT; i++) {
+ if (msgs[i].done_count == 0) {
+ continue;
+ }
+
+ send_queue = pSockContext->send_queues + i;
+ ink_mutex_acquire(&send_queue->lock);
+ send_queue->head = msgs[i].pDoneMsgs[msgs[i].done_count - 1]->next;
+ if (send_queue->head == NULL) {
+ send_queue->tail = NULL;
+ }
+ ink_mutex_release(&send_queue->lock);
+ }
+
+ for (i=0; i<PRIORITY_COUNT; i++) {
+ for (k=0; k<msgs[i].done_count; k++) {
+ msg = msgs[i].pDoneMsgs[k];
+#ifdef MSG_TIME_STAT_FLAG
+ MachineSessions *pMachineSessions;
+ SessionEntry *pSessionEntry;
+ if (get_response_session_internal(&msg->header,
+ &pMachineSessions, &pSessionEntry) == 0)
+ {
+ int session_index = msg->header.session_id.fields.seq %
+ max_session_count_per_machine;
+ SESSION_LOCK(pMachineSessions, session_index);
+
+ if (!(msg->header.session_id.fields.ip == my_machine_ip))
+ { //request by other
+ if (pSessionEntry->server_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->msg_stat.count, 1);
+ ink_atomic_increment(&pMachineSessions->msg_stat.time_used,
+ CURRENT_NS() - pSessionEntry->server_start_time);
+ pSessionEntry->server_start_time = 0;
+ }
+ }
+
+ if (pSessionEntry->send_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->msg_send.count, 1);
+ ink_atomic_increment(&pMachineSessions->msg_send.time_used,
+ (CURRENT_NS() - pSessionEntry->send_start_time));
+ pSessionEntry->send_start_time = 0;
+ }
+
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ }
+#endif
+
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "%s:%d send msg done, data body: %d, send bytes: %d",
+ __LINE__,
+ pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port,
+ msgs[i].pDoneMsgs[k]->header.data_len,
+ msgs[i].pDoneMsgs[k]->bytes_sent);
+ */
+
+ pSockContext->thread_context->stats.send_delayed_time +=
+ CURRENT_NS() - msg->in_queue_time;
+ release_out_message(pSockContext, msg);
+ }
+ }
+
+ return result;
+}
+
+static int deal_message(MsgHeader *pHeader, SocketContext *
+ pSockContext, IOBufferBlock *blocks)
+{
+ int result;
+ bool call_func;
+ MachineSessions *pMachineSessions;
+ SessionEntry *pSessionEntry;
+ void *user_data;
+ int64_t time_used;
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "file: "__FILE__", line: %d, "
+ "func_id: %d, data length: %d, recv_msg_count: %"PRId64"", __LINE__,
+ pHeader->func_id, data_len, count + 1);
+ */
+
+ //deal internal ping message first
+ if (pHeader->func_id == FUNC_ID_CLUSTER_PING_REQUEST) {
+ time_used = CURRENT_TIME() - pHeader->session_id.fields.timestamp;
+ if (time_used > 1) {
+ Warning("cluster recv client %s ping, sock: #%d, time pass: %d s",
+ pSockContext->machine->hostname, pSockContext->sock,
+ (int)time_used);
+ }
+ return cluster_send_msg_internal_ex(&pHeader->session_id,
+ pSockContext, FUNC_ID_CLUSTER_PING_RESPONSE, NULL, 0,
+ PRIORITY_HIGH, insert_into_send_queue_head);
+ }
+ else if (pHeader->func_id == FUNC_ID_CLUSTER_PING_RESPONSE) {
+ if (pSockContext->ping_start_time > 0) {
+ time_used = CURRENT_NS() - pSockContext->ping_start_time;
+ pSockContext->thread_context->stats.ping_success_count++;
+ pSockContext->thread_context->stats.ping_time_used += time_used;
+ if (time_used > cluster_ping_latency_threshold) {
+ Warning("cluster server %s, sock: #%d ping response time: %d us > threshold: %d us",
+ pSockContext->machine->hostname, pSockContext->sock,
+ (int)(time_used / HRTIME_USECOND),
+ (int)(cluster_ping_latency_threshold / HRTIME_USECOND));
+ }
+ pSockContext->ping_start_time = 0; //reset start time
+ }
+ else {
+ Warning("unexpect cluster server %s ping response, sock: #%d, time used: %d s",
+ pSockContext->machine->hostname, pSockContext->sock,
+ (int)(CURRENT_TIME() - pHeader->session_id.fields.timestamp));
+ }
+
+ if (pSockContext->ping_fail_count > 0) {
+ pSockContext->ping_fail_count = 0; //reset fail count
+ }
+
+ return 0;
+ }
+
+ result = get_response_session(pHeader, &pMachineSessions,
+ &pSessionEntry, pSockContext, &call_func, &user_data);
+ if (result != 0) {
+ /*
+ if (pHeader->session_id.fields.ip != my_machine_ip) { //request by other
+ cluster_send_msg_internal_ex(&pHeader->session_id, pSockContext,
+ FUNC_ID_CONNECTION_CLOSED_NOTIFY, NULL, 0, PRIORITY_HIGH,
+ push_to_send_queue);
+ }
+ */
+
+ return result;
+ }
+
+#ifdef MSG_TIME_STAT_FLAG
+ if ((pHeader->session_id.fields.ip == my_machine_ip)) { //request by me
+ int session_index = pHeader->session_id.fields.seq %
+ max_session_count_per_machine;
+ SESSION_LOCK(pMachineSessions, session_index);
+ if (pSessionEntry->client_start_time != 0) {
+ ink_atomic_increment(&pMachineSessions->msg_stat.count, 1);
+ ink_atomic_increment(&pMachineSessions->msg_stat.time_used,
+ CURRENT_NS() - pSessionEntry->client_start_time);
+ pSessionEntry->client_start_time = 0;
+ }
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ }
+#endif
+
+ if (call_func) {
+#ifdef DEBUG
+ int64_t deal_start_time = CURRENT_NS();
+#endif
+
+ cluster_msg_deal_func(pHeader->session_id, user_data,
+ pHeader->func_id, blocks, pHeader->data_len);
+
+#ifdef DEBUG
+ int64_t time_used = CURRENT_NS() - deal_start_time;
+ if (time_used > max_callback_time_used) {
+ max_callback_time_used = time_used;
+ }
+#endif
+ }
+ else {
+ push_in_message(pHeader->session_id, pMachineSessions, pSessionEntry,
+ pHeader->func_id, blocks, pHeader->data_len);
+ }
+
+ return 0;
+}
+
+inline static void append_to_blocks(ReaderManager *pReader,
+ const int current_body_bytes)
+{
+ IOBufferBlock *b;
+ IOBufferBlock *tail;
+
+ if (pReader->blocks == NULL) { //first block
+ pReader->blocks = new_IOBufferBlock(
+ pReader->buffer, current_body_bytes,
+ (pReader->msg_header + MSG_HEADER_LENGTH)
+ - pReader->buffer->_data);
+ pReader->blocks->_buf_end = pReader->blocks->_end;
+ return;
+ }
+
+ //other block, starting from buffer start
+ b = new_IOBufferBlock(pReader->buffer, current_body_bytes, 0);
+ b->_buf_end = b->_end;
+ if (pReader->blocks->next == NULL) {
+ pReader->blocks->next = b;
+ return;
+ }
+
+ tail = pReader->blocks->next;
+ while (tail->next != NULL) {
+ tail = tail->next;
+ }
+
+ tail->next = b;
+}
+
+static int deal_read_event(SocketContext *pSockContext)
+{
+ int result;
+ int read_bytes;
+ MsgHeader *pHeader;
+
+ pSockContext->thread_context->stats.call_read_count++;
+ read_bytes = read(pSockContext->sock, pSockContext->reader.current,
+ pSockContext->reader.buff_end - pSockContext->reader.current);
+ /*
+ Note("======file: " __FILE__ ", line: %d, "
+ "sock: #%d, %s:%d remain bytes: %"PRId64", recv bytes: %d, errno: %d", __LINE__,
+ pSockContext->sock, pSockContext->machine->hostname,
+ pSockContext->machine->cluster_port,
+ pSockContext->reader.buff_end - pSockContext->reader.current,
+ read_bytes, errno);
+ */
+ if (read_bytes == 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "type: %c, read from %s fail, connection #%d closed", __LINE__,
+ pSockContext->connect_type, pSockContext->machine->hostname,
+ pSockContext->sock);
+ return ECONNRESET;
+ }
+ else if (read_bytes < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ return EAGAIN;
+ }
+ else if (errno == EINTR) { //should try again
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "read from %s fail, errno: %d, error info: %s",
+ __LINE__, pSockContext->machine->hostname,
+ errno, strerror(errno));
+ return 0;
+ }
+ else {
+ result = errno != 0 ? errno : EIO;
+ Error("file: " __FILE__ ", line: %d, "
+ "read from %s fail, errno: %d, error info: %s",
+ __LINE__, pSockContext->machine->hostname,
+ result, strerror(result));
+ return result;
+ }
+ }
+
+ pSockContext->thread_context->stats.recv_bytes += read_bytes;
+ pSockContext->reader.current += read_bytes;
+ result = pSockContext->reader.buff_end - pSockContext->reader.current
+ == 0 ? 0 : EAGAIN;
+
+ //current is the fix buffer
+ while (1) {
+ int msg_bytes;
+ int recv_body_bytes;
+ int current_true_body_bytes;
+ int padding_body_bytes;
+ int padding_len;
+ bool bFirstBlock;
+
+ if (pSockContext->reader.blocks == NULL) { //first data block
+ msg_bytes = pSockContext->reader.current -
+ pSockContext->reader.msg_header;
+ if (msg_bytes < MSG_HEADER_LENGTH) //expect whole msg header
+ {
+ if ((pSockContext->reader.buff_end -
+ pSockContext->reader.current) < 4 * 1024)
+ {
+ if (msg_bytes > 0) { //remain bytes should be copied
+ MOVE_TO_NEW_BUFFER(pSockContext, msg_bytes);
+ }
+ else {
+ INIT_READER(pSockContext->reader, read_buffer_size);
+ }
+ }
+
+ return result;
+ }
+
+ recv_body_bytes = msg_bytes - MSG_HEADER_LENGTH;
+ bFirstBlock = true;
+ }
+ else { //other data block, starting from buffer start
+ msg_bytes = pSockContext->reader.current -
+ pSockContext->reader.buffer->_data;
+ recv_body_bytes = pSockContext->reader.recv_body_bytes + msg_bytes;
+ bFirstBlock = false;
+ }
+
+ pHeader = (MsgHeader *)pSockContext->reader.msg_header;
+#ifdef CHECK_MAGIC_NUMBER
+ if (pHeader->magic != MAGIC_NUMBER) {
+ Error("file: "__FILE__", line: %d, "
+ "magic number: %08x != %08x",
+ __LINE__, pHeader->magic, MAGIC_NUMBER);
+ return EINVAL;
+ }
+#endif
+
+ if (pHeader->aligned_data_len > MAX_MSG_LENGTH) {
+ Error("file: "__FILE__", line: %d, "
+ "message length: %d is too large, exceeds: %d",
+ __LINE__, pHeader->aligned_data_len, MAX_MSG_LENGTH);
+ return ENOSPC;
+ }
+
+#ifdef MSG_TIME_STAT_FLAG
+ if (!(pHeader->session_id.fields.ip == my_machine_ip))
+ { //request by other
+ MachineSessions *pMachineSessions;
+ SessionEntry *pSessionEntry;
+ if (get_response_session_internal(pHeader,
+ &pMachineSessions, &pSessionEntry) == 0)
+ {
+ int session_index = pHeader->session_id.fields.seq %
+ max_session_count_per_machine;
+ SESSION_LOCK(pMachineSessions, session_index);
+ if (pSessionEntry->server_start_time == 0) {
+ pSessionEntry->server_start_time = CURRENT_NS();
+ }
+ SESSION_UNLOCK(pMachineSessions, session_index);
+ }
+ }
+#endif
+
+ if (recv_body_bytes < pHeader->aligned_data_len) { //msg not done
+ if (recv_body_bytes + (pSockContext->reader.buff_end -
+ pSockContext->reader.current) >= pHeader->aligned_data_len)
+ { //remain buffer is enough
+ return result;
+ }
+
+ padding_body_bytes = recv_body_bytes - pSockContext->
+ reader.recv_body_bytes;
+ int recv_padding_len = recv_body_bytes - pHeader->data_len;
+ if (recv_padding_len > 0) { //should remove padding bytes
+ current_true_body_bytes = padding_body_bytes - recv_padding_len;
+ }
+ else {
+ current_true_body_bytes = padding_body_bytes;
+ }
+
+ //must be only one block
+ if (pHeader->func_id < 0) {
+ if (!bFirstBlock) {
+ Error("file: "__FILE__", line: %d, "
+ "func_id: %d, data length: %d too large exceeds %d",
+ __LINE__, pHeader->func_id, pHeader->data_len,
+ (int)(read_buffer_size - MSG_HEADER_LENGTH));
+ return EINVAL;
+ }
+
+ MOVE_TO_NEW_BUFFER(pSockContext, msg_bytes);
+ return result;
+ }
+
+ if (pSockContext->reader.buff_end - pSockContext->reader.current >=
+ 4 * 1024)
+ { //use remain data buffer
+ return result;
+ }
+
+ if (recv_body_bytes % ALIGN_BYTES != 0) { //must be aligned
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "recv_body_bytes: %d (%X) should be aligned with %d", __LINE__,
+ recv_body_bytes, recv_body_bytes, ALIGN_BYTES);
+ ink_release_assert(pSockContext->reader.current < pSockContext->reader.buff_end);
+ return result;
+ }
+
+ if (current_true_body_bytes > 0) { //should alloc new buffer
+ append_to_blocks(&pSockContext->reader, current_true_body_bytes);
+ }
+ pSockContext->reader.recv_body_bytes = recv_body_bytes;
+
+ if (bFirstBlock) {
+ if (current_true_body_bytes > 0) { //should keep the msg_header
+ ALLOC_READER_BUFFER(pSockContext->reader, read_buffer_size);
+ }
+ else { //no data yet!
+ MOVE_TO_NEW_BUFFER(pSockContext, msg_bytes);
+ }
+ }
+ else { //should keep the msg_header
+ ALLOC_READER_BUFFER(pSockContext->reader, read_buffer_size);
+ }
+
+ return result;
+ }
+
+ if (bFirstBlock) {
+ padding_body_bytes = pHeader->aligned_data_len;
+ }
+ else {
+ padding_body_bytes = pHeader->aligned_data_len -
+ pSockContext->reader.recv_body_bytes;
+ }
+ padding_len = pHeader->aligned_data_len - pHeader->data_len;
+ if (padding_len > 0) {
+ if (padding_body_bytes > padding_len) {
+ current_true_body_bytes = padding_body_bytes - padding_len;
+ }
+ else {
+ current_true_body_bytes = 0;
+ }
+ }
+ else { //no padding bytes
+ current_true_body_bytes = padding_body_bytes;
+ }
+
+ if (current_true_body_bytes > 0) {
+ append_to_blocks(&pSockContext->reader, current_true_body_bytes);
+ }
+
+ pSockContext->thread_context->stats.recv_msg_count++;
+ deal_message(pHeader, pSockContext, pSockContext->reader.blocks);
+
+ pSockContext->reader.blocks = NULL; //free memory pointer
+ if (pSockContext->reader.recv_body_bytes > 0) {
+ pSockContext->reader.recv_body_bytes = 0;
+ }
+
+ if (bFirstBlock) {
+ pSockContext->reader.msg_header += MSG_HEADER_LENGTH + padding_body_bytes;
+ }
+ else { //other block, no msg header
+ pSockContext->reader.msg_header = pSockContext->reader.buffer->_data +
+ padding_body_bytes;
+ }
+ }
+
+ return result;
+}
+
+inline static void deal_epoll_events(WorkerThreadContext *
+ pThreadContext, const int count)
+{
+ int result;
+ int events;
+ SocketContext *pSockContext;
+
+ for (int i=0; i<count; i++) {
+ events = pThreadContext->ev_poll->getEvents(i);
+ pSockContext = (SocketContext *)pThreadContext->ev_poll->getData(i);
+
+ /*
+ Debug(CLUSTER_DEBUG_TAG, "======file: "__FILE__", line: %d, "
+ "sock #%d get epoll event: %d", __LINE__,
+ pSockContext->sock, pEvent->events);
+ */
+ if ((events & EVENTIO_ERROR) != 0) {
+ Debug(CLUSTER_DEBUG_TAG, "file: " __FILE__ ", line: %d, "
+ "connection %s %s:%d closed", __LINE__,
+ pSockContext->connect_type == CONNECT_TYPE_CLIENT ? "to" : "from",
+ pSockContext->machine->hostname, pSockContext->machine->cluster_port);
+
+ close_socket(pSockContext);
+ continue;
+ }
+
+ while ((result=deal_read_event(pSockContext)) == 0) {
+ }
+
+ if (result != EAGAIN) {
+ close_socket(pSockContext);
+ }
+ }
+
+ return;
+}
+
+inline static void schedule_sock_write(WorkerThreadContext * pThreadContext)
+{
+#define MAX_SOCK_CONTEXT_COUNT 32
+ int result;
+ int fail_count;
+ int64_t current_time;
+ SocketContext **ppSockContext;
+ SocketContext **ppContextEnd;
+ SocketContext *failSockContexts[MAX_SOCK_CONTEXT_COUNT];
+
+ fail_count = 0;
+ current_time = CURRENT_NS();
+ ppContextEnd = pThreadContext->active_sockets +
+ pThreadContext->active_sock_count;
+ for (ppSockContext = pThreadContext->active_sockets;
+ ppSockContext < ppContextEnd; ppSockContext++)
+ {
+ if (current_time < (*ppSockContext)->next_write_time) {
+ continue;
+ }
+
+ if ((*ppSockContext)->ping_start_time > 0) { //ping message already sent
+ if (current_time - (*ppSockContext)->ping_start_time > cluster_ping_latency_threshold) {
+ (*ppSockContext)->ping_start_time = 0; //reset start time when done
+ (*ppSockContext)->ping_fail_count++;
+ if ((*ppSockContext)->ping_fail_count > cluster_ping_retries) {
+ if (fail_count < MAX_SOCK_CONTEXT_COUNT) {
+ Error("ping cluster server %s timeout more than %d times, close socket #%d",
+ (*ppSockContext)->machine->hostname, cluster_ping_retries,
+ (*ppSockContext)->sock);
+ failSockContexts[fail_count++] = *ppSockContext;
+ }
+ continue;
+ }
+ else {
+ Warning("ping cluster server %s timeout, sock: #%d, fail count: %d",
+ (*ppSockContext)->machine->hostname, (*ppSockContext)->sock,
+ (*ppSockContext)->ping_fail_count);
+ }
+ }
+ }
+ else {
+ if (cluster_ping_send_interval > 0 && current_time >=
+ (*ppSockContext)->next_ping_time)
+ {
+ (*ppSockContext)->thread_context->stats.ping_total_count++;
+ (*ppSockContext)->ping_start_time = current_time;
+ (*ppSockContext)->next_ping_time = current_time + cluster_ping_send_interval;
+ send_ping_message(*ppSockContext);
+ }
+ }
+
+ while ((result=deal_write_event(*ppSockContext)) == 0) {
+ }
+
+ if (result == EAGAIN) {
+ (*ppSockContext)->next_write_time = current_time + send_wait_time;
+ }
+ else { //error
+ if (fail_count < MAX_SOCK_CONTEXT_COUNT) {
+ failSockContexts[fail_count++] = *ppSockContext;
+ }
+ }
+ }
+
+ if (fail_count == 0) {
+ return;
+ }
+
+ ppContextEnd = failSockContexts + fail_count;
+ for (ppSockContext = failSockContexts; ppSockContext < ppContextEnd;
+ ppSockContext++)
+ {
+ close_socket(*ppSockContext);
+ }
+}
+
+inline static int64_t get_current_time()
+{
+ timeval tv;
+ gettimeofday(&tv, NULL);
+ Thread::cur_time = tv.tv_sec * HRTIME_SECOND +
+ tv.tv_usec * HRTIME_USECOND;
+ return Thread::cur_time;
+}
+
+#define GET_MAX_TIME_USED(v) \
+ do { \
+ deal_end_time = get_current_time(); \
+ time_used = deal_end_time - deal_start_time; \
+ if (time_used > v) { \
+ v = time_used; \
+ } \
+ deal_start_time = deal_end_time; \
+ } while (0)
+
+
+static void *work_thread_entrance(void* arg)
+{
+#define MIN_USLEEP_TIME 100
+
+ int result;
+ int count;
+ int remain_time;
+ int64_t loop_start_time;
+ int64_t deal_start_time;
+#ifdef DEBUG
+ int64_t deal_end_time;
+ int64_t time_used;
+#endif
+ WorkerThreadContext *pThreadContext;
+
+ pThreadContext = (WorkerThreadContext *)arg;
+
+#if defined(HAVE_SYS_PRCTL_H) && defined(PR_SET_NAME)
+ char name[32];
+ sprintf(name, "[ET_CLUSTER %d]", (int)(pThreadContext -
+ cluster_worker_thread_contexts) + 1);
+ prctl(PR_SET_NAME, name, 0, 0, 0);
+#endif
+
+ while (1) {
+ loop_start_time = get_current_time();
+#ifdef DEBUG
+ deal_start_time = loop_start_time;
+#endif
+
+ schedule_sock_write(pThreadContext);
+
+#ifdef DEBUG
+ GET_MAX_TIME_USED(max_write_loop_time_used);
+#endif
+
+#ifndef DEBUG
+ deal_start_time = CURRENT_NS();
+#endif
+ pThreadContext->stats.epoll_wait_count++;
+ count = pThreadContext->ev_poll->poll();
+ pThreadContext->stats.epoll_wait_time_used += CURRENT_NS() - deal_start_time;
+#ifdef DEBUG
+ GET_MAX_TIME_USED(max_epoll_time_used);
+#endif
+
+ if (count == 0) { //timeout
+ }
+ else if (count < 0) {
+ if (errno != EINTR) {
+ ink_fatal(1, "file: "__FILE__", line: %d, "
+ "call event poll fail, "
+ "errno: %d, error info: %s\n",
+ __LINE__, errno, strerror(errno));
+ }
+ }
+ else {
+ deal_epoll_events(pThreadContext, count);
+
+#ifdef DEBUG
+ GET_MAX_TIME_USED(max_read_loop_time_used);
+#endif
+ }
+
+ if (io_loop_interval > MIN_USLEEP_TIME) {
+ remain_time = io_loop_interval - (int)((CURRENT_NS() -
+ loop_start_time) / HRTIME_USECOND);
+ if (remain_time >= MIN_USLEEP_TIME && remain_time <= io_loop_interval) {
+ pThreadContext->stats.loop_usleep_count++;
+ pThreadContext->stats.loop_usleep_time += remain_time;
+ usleep(remain_time);
+
+#ifdef DEBUG
+ GET_MAX_TIME_USED(max_usleep_time_used);
+#endif
+ }
+ }
+ }
+
+ if ((result=ink_mutex_acquire(&worker_thread_lock)) != 0)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "call ink_mutex_acquire fail, "
+ "errno: %d, error info: %s",
+ __LINE__, result, strerror(result));
+ }
+ cluster_worker_thread_count--;
+ if ((result=ink_mutex_release(&worker_thread_lock)) != 0)
+ {
+ Error("file: "__FILE__", line: %d, "
+ "call ink_mutex_release fail, "
+ "errno: %d, error info: %s",
+ __LINE__, result, strerror(result));
+ }
+
+ return NULL;
+}
+
+int push_to_send_queue(SocketContext *pSockContext, OutMessage *pMessage,
+ const MessagePriority priority, const uint32_t sessionVersion)
+{
+ int result;
+ ink_mutex_acquire(&pSockContext->send_queues[priority].lock);
+ do {
+ if (pSockContext->version != sessionVersion) {
+ Debug(CLUSTER_DEBUG_TAG, "session version: %u != socket context version: %d!",
+ sessionVersion, pSockContext->version);
+ result = EINVAL;
+ break;
+ }
+
+ if (pSockContext->sock < 0) {
+ Debug(CLUSTER_DEBUG_TAG, "sock context is invalid");
+ result = EINVAL;
+ break;
+ }
+ result = 0;
+ } while (0);
+
+ if (result != 0) {
+ ink_mutex_release(&pSockContext->send_queues[priority].lock);
+
+ ink_atomic_increment(&pSockContext->thread_context->stats.fail_msg_count, 1);
+ ink_atomic_increment(&pSockContext->thread_context->stats.fail_msg_bytes,
+ MSG_HEADER_LENGTH + pMessage->header.aligned_data_len);
+ return result;
+ }
+
+ if (pSockContext->send_queues[priority].head == NULL) {
+ pSockContext->send_queues[priority].head = pMessage;
+ }
+ else {
+ pSockContext->send_queues[priority].tail->next = pMessage;
+ }
+ pSockContext->send_queues[priority].tail = pMessage;
+ ink_mutex_release(&pSockContext->send_queues[priority].lock);
+
+ ink_atomic_increment(&pSockContext->thread_context->stats.push_msg_count, 1);
+ ink_atomic_increment(&pSockContext->thread_context->stats.push_msg_bytes,
+ MSG_HEADER_LENGTH + pMessage->header.aligned_data_len);
+ return 0;
+}
+
+int insert_into_send_queue_head(SocketContext *pSockContext, OutMessage *pMessage,
+ const MessagePriority priority)
+{
+ ink_mutex_acquire(&pSockContext->send_queues[priority].lock);
+ if (pSockContext->send_queues[priority].head == NULL) {
+ pSockContext->send_queues[priority].head = pMessage;
+ pSockContext->send_queues[priority].tail = pMessage;
+ }
+ else {
+ if (pSockContext->send_queues[priority].head->bytes_sent == 0) { //head message not send yet
+ pMessage->next = pSockContext->send_queues[priority].head;
+ pSockContext->send_queues[priority].head = pMessage;
+ }
+ else {
+ pMessage->next = pSockContext->send_queues[priority].head->next;
+ pSockContext->send_queues[priority].head->next = pMessage;
+ if (pMessage->next == NULL) {
+ pSockContext->send_queues[priority].tail = pMessage;
+ }
+ }
+ }
+ ink_mutex_release(&pSockContext->send_queues[priority].lock);
+
+ ink_atomic_increment(&pSockContext->thread_context->stats.push_msg_count, 1);
+ ink_atomic_increment(&pSockContext->thread_context->stats.push_msg_bytes,
+ MSG_HEADER_LENGTH + pMessage->header.aligned_data_len);
+
+ return 0;
+}
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/nio.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/nio.h b/iocore/cluster/nio.h
new file mode 100644
index 0000000..94a3fdb
--- /dev/null
+++ b/iocore/cluster/nio.h
@@ -0,0 +1,60 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef _NIO_H_
+#define _NIO_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "clusterinterface.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern WorkerThreadContext *cluster_worker_thread_contexts;
+extern int cluster_worker_thread_count;
+
+extern message_deal_func cluster_msg_deal_func;
+extern machine_change_notify_func cluster_machine_change_notify;
+
+int nio_init();
+int nio_destroy();
+
+int nio_add_to_epoll(SocketContext *pSockContext);
+int push_to_send_queue(SocketContext *pSockContext, OutMessage *pMessage,
+ const MessagePriority priority, const uint32_t sessionVersion);
+
+int insert_into_send_queue_head(SocketContext *pSockContext, OutMessage *pMessage,
+ const MessagePriority priority);
+
+void log_nio_stats();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
[4/6] refine the codes of cluster
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/ClusterConfig.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/ClusterConfig.cc b/iocore/cluster/ClusterConfig.cc
index a541364..b969f44 100644
--- a/iocore/cluster/ClusterConfig.cc
+++ b/iocore/cluster/ClusterConfig.cc
@@ -28,6 +28,9 @@
****************************************************************************/
#include "P_Cluster.h"
+#include "machine.h"
+#include "connection.h"
+
// updated from the cluster port configuration variable
int cluster_port = DEFAULT_CLUSTER_PORT_NUMBER;
@@ -155,16 +158,30 @@ ClusterAccept::ClusterAcceptMachine(NetVConnection * NetVC)
}
static void
-make_cluster_connections(MachineList * l)
+make_cluster_connections(MachineList * l, MachineList * old)
{
//
// Connect to all new machines.
//
- uint32_t ip = this_cluster_machine()->ip;
- int num_connections = this_cluster_machine()->num_connections;
+ //uint32_t ip = this_cluster_machine()->ip;
+ //int num_connections = this_cluster_machine()->num_connections;
+ int i;
+ int k;
+ ClusterMachine *m;
if (l) {
- for (int i = 0; i < l->n; i++) {
+ for (i = 0; i < l->n; i++) {
+ struct in_addr in;
+ in.s_addr = l->machine[i].ip;
+ m = add_machine(l->machine[i].ip, l->machine[i].port);
+ if (m != NULL) {
+ machine_make_connections(m);
+ }
+
+ Debug(CL_NOTE, "do connect hostname: %u.%u.%u.%u:%d, %s, cluster_machine_count: %d\n",
+ DOT_SEPARATED(l->machine[i].ip), l->machine[i].port, inet_ntoa(in), cluster_machine_count);
+
+ /*
#ifdef LOCAL_CLUSTER_TEST_MODE
if (ip < l->machine[i].ip || (ip == l->machine[i].ip && (cluster_port < l->machine[i].port))) {
#else
@@ -175,6 +192,48 @@ make_cluster_connections(MachineList * l)
}
}
}
+ */
+ }
+ }
+
+ if (old == NULL) {
+ return;
+ }
+
+ //found down machines
+ if (l == NULL) {
+ for (i = 0; i < old->n; i++) {
+ struct in_addr in;
+ in.s_addr = old->machine[i].ip;
+ Debug(CL_NOTE, "stop connect hostname: %u.%u.%u.%u:%d, %s\n",
+ DOT_SEPARATED(old->machine[i].ip), old->machine[i].port, inet_ntoa(in));
+ m = get_machine(old->machine[i].ip, old->machine[i].port);
+ if (m != NULL) {
+ machine_stop_reconnect(m);
+ }
+ }
+ }
+ else {
+ for (i = 0; i < old->n; i++) {
+ for (k = 0; k < l->n; k++) {
+ if (l->machine[k].ip == old->machine[i].ip &&
+ l->machine[k].port == old->machine[i].port)
+ {
+ break;
+ }
+ }
+
+ if (k == l->n) { //not found, machine down
+ struct in_addr in;
+ in.s_addr = old->machine[i].ip;
+ Debug(CL_NOTE, "stop connect hostname: %u.%u.%u.%u:%d, %s\n",
+ DOT_SEPARATED(old->machine[i].ip), old->machine[i].port, inet_ntoa(in));
+ m = get_machine(old->machine[i].ip, old->machine[i].port);
+ if (m != NULL) {
+ machine_stop_reconnect(m);
+ }
+ }
+ }
}
}
@@ -201,7 +260,7 @@ machine_config_change(const char * /* name ATS_UNUSED */, RecDataT /* data_type
case CLUSTER_CONFIG:
old = cluster_config;
cluster_config = l;
- make_cluster_connections(l);
+ make_cluster_connections(l, old);
break;
}
#else
@@ -209,7 +268,7 @@ machine_config_change(const char * /* name ATS_UNUSED */, RecDataT /* data_type
old = cluster_config;
machines_config = l;
cluster_config = l;
- make_cluster_connections(l);
+ make_cluster_connections(l, old);
#endif
if (old)
free_MachineList(old);
@@ -291,8 +350,10 @@ configuration_add_machine(ClusterConfiguration * c, ClusterMachine * m)
// Build a new cluster configuration with the new machine.
// Machines are stored in ip sorted order.
//
+ /*
EThread *thread = this_ethread();
ProxyMutex *mutex = thread->mutex;
+ */
int i = 0;
ClusterConfiguration *cc = NEW(new ClusterConfiguration(*c));
@@ -319,7 +380,7 @@ configuration_add_machine(ClusterConfiguration * c, ClusterMachine * m)
build_cluster_hash_table(cc);
INK_MEMORY_BARRIER; // commit writes before freeing old hash table
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONFIGURATION_CHANGES_STAT);
+ //CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONFIGURATION_CHANGES_STAT);
free_configuration(c, cc);
return cc;
@@ -328,9 +389,6 @@ configuration_add_machine(ClusterConfiguration * c, ClusterMachine * m)
ClusterConfiguration *
configuration_remove_machine(ClusterConfiguration * c, ClusterMachine * m)
{
- EThread *thread = this_ethread();
- ProxyMutex *mutex = thread->mutex;
-
//
// Build a new cluster configuration without a machine
//
@@ -350,7 +408,7 @@ configuration_remove_machine(ClusterConfiguration * c, ClusterMachine * m)
build_cluster_hash_table(cc);
INK_MEMORY_BARRIER; // commit writes before freeing old hash table
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONFIGURATION_CHANGES_STAT);
+ //CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONFIGURATION_CHANGES_STAT);
free_configuration(c, cc);
return cc;
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/ClusterMachine.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/ClusterMachine.cc b/iocore/cluster/ClusterMachine.cc
index 00e99e6..1ac4fae 100644
--- a/iocore/cluster/ClusterMachine.cc
+++ b/iocore/cluster/ClusterMachine.cc
@@ -77,9 +77,9 @@ ClusterMachine::ClusterMachine(char *ahostname, unsigned int aip, int aport)
msg_proto_minor(0),
clusterHandlers(0)
{
- EThread *thread = this_ethread();
- ProxyMutex *mutex = thread->mutex;
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_MACHINES_ALLOCATED_STAT);
+ //EThread *thread = this_ethread();
+ //ProxyMutex *mutex = thread->mutex;
+ //CLUSTER_INCREMENT_DYN_STAT(CLUSTER_MACHINES_ALLOCATED_STAT);
if (!aip) {
char localhost[1024];
if (!ahostname) {
@@ -166,7 +166,7 @@ ClusterHandler *ClusterMachine::pop_ClusterHandler(int no_rr)
ClusterMachine::~ClusterMachine()
{
ats_free(hostname);
- ats_free(clusterHandlers);
+ // ats_free(clusterHandlers);
}
struct MachineTimeoutContinuation;
@@ -193,10 +193,10 @@ struct MachineTimeoutContinuation: public Continuation
void
free_ClusterMachine(ClusterMachine * m)
{
- EThread *thread = this_ethread();
- ProxyMutex *mutex = thread->mutex;
+ //EThread *thread = this_ethread();
+ //ProxyMutex *mutex = thread->mutex;
// delay before the final free
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_MACHINES_FREED_STAT);
+ //CLUSTER_INCREMENT_DYN_STAT(CLUSTER_MACHINES_FREED_STAT);
m->dead = true;
eventProcessor.schedule_in(NEW(new MachineTimeoutContinuation(m)), MACHINE_TIMEOUT, ET_CALL);
}
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/ClusterProcessor.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/ClusterProcessor.cc b/iocore/cluster/ClusterProcessor.cc
index b01e0ff..ab6c0a0 100644
--- a/iocore/cluster/ClusterProcessor.cc
+++ b/iocore/cluster/ClusterProcessor.cc
@@ -28,21 +28,34 @@
****************************************************************************/
#include "P_Cluster.h"
+#include "global.h"
+#include "connection.h"
+
/*************************************************************************/
// ClusterProcessor member functions (Public class)
/*************************************************************************/
int cluster_port_number = DEFAULT_CLUSTER_PORT_NUMBER;
int cache_clustering_enabled = 0;
int num_of_cluster_threads = DEFAULT_NUMBER_OF_CLUSTER_THREADS;
+int num_of_cluster_connections = 0;
ClusterProcessor clusterProcessor;
RecRawStatBlock *cluster_rsb = NULL;
int ET_CLUSTER;
+void cluster_main_handler(ClusterSession session, void *context,
+ const int func_id, IOBufferBlock *data, const int data_len);
ClusterProcessor::ClusterProcessor():accept_handler(NULL), this_cluster(NULL)
{
}
+
+//void cluster_error_handler(int event, void *arg);
+//void cluster_main_handler(ClusterSession *session, const int func_id,
+// void *data, const int data_len) {
+// ClusterRPC[func_id](session, data, data_len);
+//}
+
ClusterProcessor::~ClusterProcessor()
{
if (accept_handler) {
@@ -55,98 +68,111 @@ int
ClusterProcessor::internal_invoke_remote(ClusterHandler *ch, int cluster_fn,
void *data, int len, int options, void *cmsg)
{
- EThread *thread = this_ethread();
- ProxyMutex *mutex = thread->mutex;
- //
- // RPC facility for intercluster communication available to other
- // subsystems.
- //
- bool steal = (options & CLUSTER_OPT_STEAL ? true : false);
- bool delay = (options & CLUSTER_OPT_DELAY ? true : false);
- bool data_in_ocntl = (options & CLUSTER_OPT_DATA_IS_OCONTROL ? true : false);
- bool malloced = (cluster_fn == CLUSTER_FUNCTION_MALLOCED);
- OutgoingControl *c;
-
- if (!ch || (!malloced && !((unsigned int) cluster_fn < (uint32_t) SIZE_clusterFunction))) {
- // Invalid message or node is down, free message data
- if (malloced) {
- ats_free(data);
- }
- if (cmsg) {
- invoke_remote_data_args *args = (invoke_remote_data_args *)
- (((OutgoingControl *) cmsg)->data + sizeof(int32_t));
- ink_assert(args->magicno == invoke_remote_data_args::MagicNo);
+// EThread *thread = this_ethread();
+// ProxyMutex *mutex = thread->mutex;
+// //
+// // RPC facility for intercluster communication available to other
+// // subsystems.
+// //
+// bool steal = (options & CLUSTER_OPT_STEAL ? true : false);
+// bool delay = (options & CLUSTER_OPT_DELAY ? true : false);
+// bool data_in_ocntl = (options & CLUSTER_OPT_DATA_IS_OCONTROL ? true : false);
+// bool malloced = (cluster_fn == CLUSTER_FUNCTION_MALLOCED);
+// OutgoingControl *c;
+//
+// if (!ch || (!malloced && !((unsigned int) cluster_fn < (uint32_t) SIZE_clusterFunction))) {
+// // Invalid message or node is down, free message data
+// if (malloced) {
+// ats_free(data);
+// }
+// if (cmsg) {
+// invoke_remote_data_args *args = (invoke_remote_data_args *)
+// (((OutgoingControl *) cmsg)->data + sizeof(int32_t));
+// ink_assert(args->magicno == invoke_remote_data_args::MagicNo);
+//
+// args->data_oc->freeall();
+// ((OutgoingControl *) cmsg)->freeall();
+// }
+// if (data_in_ocntl) {
+// c = *((OutgoingControl **) ((char *) data - sizeof(OutgoingControl *)));
+// c->freeall();
+// }
+// return -1;
+// }
+//
+// if (data_in_ocntl) {
+// c = *((OutgoingControl **) ((char *) data - sizeof(OutgoingControl *)));
+// } else {
+// c = OutgoingControl::alloc();
+// }
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CTRL_MSGS_SENT_STAT);
+// c->submit_time = ink_get_hrtime();
+// if ((c->zero_body = zero_body)) {
+// c->free_proc = &CacheContinuation::disposeOfDataBuffer;
+// c->free_proc_arg = cc;
+// }
+//
+// if (malloced) {
+// c->set_data((char *) data, len);
+// } else {
+// if (!data_in_ocntl) {
+// c->len = len + sizeof(int32_t);
+// c->alloc_data();
+// }
+// if (!c->fast_data()) {
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_SLOW_CTRL_MSGS_SENT_STAT);
+// }
+// *(int32_t *) c->data = cluster_fn;
+// if (!data_in_ocntl) {
+// memcpy(c->data + sizeof(int32_t), data, len);
+// }
+// }
+//
+// SET_CONTINUATION_HANDLER(c, (OutgoingCtrlHandler) & OutgoingControl::startEvent);
+//
+// /////////////////////////////////////
+// // Compound message adjustments
+// /////////////////////////////////////
+// if (cmsg) {
+// invoke_remote_data_args *args = (invoke_remote_data_args *)
+// (((OutgoingControl *) cmsg)->data + sizeof(int32_t));
+// ink_assert(args->magicno == invoke_remote_data_args::MagicNo);
+// args->msg_oc = c;
+// c = (OutgoingControl *) cmsg;
+// }
+//#ifndef CLUSTER_THREAD_STEALING
+// delay = true;
+//#endif
+// if (!delay) {
+// EThread *tt = this_ethread();
+// {
+// int q = ClusterFuncToQpri(cluster_fn);
+// ink_atomiclist_push(&ch->outgoing_control_al[q], (void *) c);
+//
+// MUTEX_TRY_LOCK(lock, ch->mutex, tt);
+// if (!lock) {
+// if(ch->thread && ch->thread->signal_hook)
+// ch->thread->signal_hook(ch->thread);
+// return 1;
+// }
+// if (steal)
+// ch->steal_thread(tt);
+// return 1;
+// }
+// } else {
+// c->mutex = ch->mutex;
+// eventProcessor.schedule_imm_signal(c);
+// return 0;
+// }
+
+ (void) ch;
+ (void) cluster_fn;
+ (void) data;
+ (void) len;
+ (void) options;
+ (void) cmsg;
- args->data_oc->freeall();
- ((OutgoingControl *) cmsg)->freeall();
- }
- if (data_in_ocntl) {
- c = *((OutgoingControl **) ((char *) data - sizeof(OutgoingControl *)));
- c->freeall();
- }
- return -1;
- }
-
- if (data_in_ocntl) {
- c = *((OutgoingControl **) ((char *) data - sizeof(OutgoingControl *)));
- } else {
- c = OutgoingControl::alloc();
- }
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CTRL_MSGS_SENT_STAT);
- c->submit_time = ink_get_hrtime();
-
- if (malloced) {
- c->set_data((char *) data, len);
- } else {
- if (!data_in_ocntl) {
- c->len = len + sizeof(int32_t);
- c->alloc_data();
- }
- if (!c->fast_data()) {
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_SLOW_CTRL_MSGS_SENT_STAT);
- }
- *(int32_t *) c->data = cluster_fn;
- if (!data_in_ocntl) {
- memcpy(c->data + sizeof(int32_t), data, len);
- }
- }
-
- SET_CONTINUATION_HANDLER(c, (OutgoingCtrlHandler) & OutgoingControl::startEvent);
-
- /////////////////////////////////////
- // Compound message adjustments
- /////////////////////////////////////
- if (cmsg) {
- invoke_remote_data_args *args = (invoke_remote_data_args *)
- (((OutgoingControl *) cmsg)->data + sizeof(int32_t));
- ink_assert(args->magicno == invoke_remote_data_args::MagicNo);
- args->msg_oc = c;
- c = (OutgoingControl *) cmsg;
- }
-#ifndef CLUSTER_THREAD_STEALING
- delay = true;
-#endif
- if (!delay) {
- EThread *tt = this_ethread();
- {
- int q = ClusterFuncToQpri(cluster_fn);
- ink_atomiclist_push(&ch->outgoing_control_al[q], (void *) c);
-
- MUTEX_TRY_LOCK(lock, ch->mutex, tt);
- if (!lock) {
- if(ch->thread && ch->thread->signal_hook)
- ch->thread->signal_hook(ch->thread);
- return 1;
- }
- if (steal)
- ch->steal_thread(tt);
- return 1;
- }
- } else {
- c->mutex = ch->mutex;
- eventProcessor.schedule_imm_signal(c);
- return 0;
- }
+ return 0;
}
int
@@ -162,45 +188,56 @@ ClusterProcessor::invoke_remote_data(ClusterHandler *ch, int cluster_fn,
int dest_channel, ClusterVCToken * token,
void (*bufdata_free_proc) (void *), void *bufdata_free_proc_arg, int options)
{
- if (!buf) {
- // No buffer data, translate this into a invoke_remote() request
- return internal_invoke_remote(ch, cluster_fn, data, data_len, options, (void *) NULL);
- }
- ink_assert(data);
- ink_assert(data_len);
- ink_assert(dest_channel);
- ink_assert(token);
- ink_assert(bufdata_free_proc);
- ink_assert(bufdata_free_proc_arg);
-
- /////////////////////////////////////////////////////////////////////////
- // Build the compound message as described by invoke_remote_data_args.
- /////////////////////////////////////////////////////////////////////////
-
- // Build OutgoingControl for buffer data
- OutgoingControl *bufdata_oc = OutgoingControl::alloc();
- bufdata_oc->set_data(buf, bufdata_free_proc, bufdata_free_proc_arg);
-
- // Build OutgoingControl for compound message header
- invoke_remote_data_args mh;
- mh.msg_oc = 0;
- mh.data_oc = bufdata_oc;
- mh.dest_channel = dest_channel;
- mh.token = *token;
-
- OutgoingControl *chdr = OutgoingControl::alloc();
- chdr->submit_time = ink_get_hrtime();
- chdr->len = sizeof(int32_t) + sizeof(mh);
- chdr->alloc_data();
- *(int32_t *) chdr->data = -1; // always -1 for compound message
- memcpy(chdr->data + sizeof(int32_t), (char *) &mh, sizeof(mh));
-
- return internal_invoke_remote(ch, cluster_fn, data, data_len, options, (void *) chdr);
+// if (!buf) {
+// // No buffer data, translate this into a invoke_remote() request
+// return internal_invoke_remote(ch, cluster_fn, data, data_len, options, (void *) NULL, zero_body, bufdata_free_proc_arg);
+// }
+// ink_assert(data);
+// ink_assert(data_len);
+// ink_assert(dest_channel);
+// ink_assert(token);
+// ink_assert(bufdata_free_proc);
+// ink_assert(bufdata_free_proc_arg);
+//
+// /////////////////////////////////////////////////////////////////////////
+// // Build the compound message as described by invoke_remote_data_args.
+// /////////////////////////////////////////////////////////////////////////
+//
+// // Build OutgoingControl for buffer data
+// OutgoingControl *bufdata_oc = OutgoingControl::alloc();
+// bufdata_oc->set_data(buf, bufdata_free_proc, bufdata_free_proc_arg);
+//
+// // Build OutgoingControl for compound message header
+// invoke_remote_data_args mh;
+// mh.msg_oc = 0;
+// mh.data_oc = bufdata_oc;
+// mh.dest_channel = dest_channel;
+// mh.token = *token;
+//
+// OutgoingControl *chdr = OutgoingControl::alloc();
+// chdr->submit_time = ink_get_hrtime();
+// chdr->len = sizeof(int32_t) + sizeof(mh);
+// chdr->alloc_data();
+// *(int32_t *) chdr->data = -1; // always -1 for compound message
+// memcpy(chdr->data + sizeof(int32_t), (char *) &mh, sizeof(mh));
+//
+// return internal_invoke_remote(ch, cluster_fn, data, data_len, options, (void *) chdr);
+
+ (void) ch;
+ (void) cluster_fn;
+ (void) data;
+ (void) data_len;
+ (void) buf;
+ (void) dest_channel;
+ (void) token;
+ (void) bufdata_free_proc;
+ (void) bufdata_free_proc_arg;
+ (void) options;
+ return 0;
}
-// TODO: Why pass in the length here if not used ?
void
-ClusterProcessor::free_remote_data(char *p, int /* l ATS_UNUSED */)
+ClusterProcessor::free_remote_data(char *p, int /* l */)
{
char *d = p - sizeof(int32_t); // reset to ptr to function code
int data_hdr = ClusterControl::DATA_HDR;
@@ -225,65 +262,70 @@ ClusterProcessor::free_remote_data(char *p, int /* l ATS_UNUSED */)
}
ClusterVConnection *
-ClusterProcessor::open_local(Continuation * cont, ClusterMachine */* m ATS_UNUSED */, ClusterVCToken & token, int options)
+ClusterProcessor::open_local(Continuation * cont, ClusterMachine * m, ClusterVCToken & token, int options)
{
- //
- // New connect protocol.
- // As a VC initiator, establish the VC connection to the remote node
- // by allocating the VC locally and requiring the caller to pass the
- // token and channel id in the remote request. The remote handler calls
- // connect_local to establish the remote side of the connection.
- //
- bool immediate = ((options & CLUSTER_OPT_IMMEDIATE) ? true : false);
- bool allow_immediate = ((options & CLUSTER_OPT_ALLOW_IMMEDIATE) ? true : false);
-
- ClusterHandler *ch = ((CacheContinuation *)cont)->ch;
- if (!ch)
- return NULL;
- EThread *t = ch->thread;
- if (!t)
- return NULL;
-
- EThread *thread = this_ethread();
- ProxyMutex *mutex = thread->mutex;
- ClusterVConnection *vc = clusterVCAllocator.alloc();
- vc->new_connect_read = (options & CLUSTER_OPT_CONN_READ ? 1 : 0);
- vc->start_time = ink_get_hrtime();
- vc->last_activity_time = vc->start_time;
- vc->ch = ch;
- vc->token.alloc();
- vc->token.ch_id = ch->id;
- token = vc->token;
-#ifdef CLUSTER_THREAD_STEALING
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONNECTIONS_OPENNED_STAT);
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONNECTIONS_OPEN_STAT);
- MUTEX_TRY_LOCK(lock, ch->mutex, thread);
- if (!lock) {
-#endif
- if (immediate) {
- clusterVCAllocator_free(vc);
- return NULL;
- }
- vc->action_ = cont;
- ink_atomiclist_push(&ch->external_incoming_open_local, (void *) vc);
- if(ch->thread && ch->thread->signal_hook)
- ch->thread->signal_hook(ch->thread);
- return CLUSTER_DELAYED_OPEN;
-
-#ifdef CLUSTER_THREAD_STEALING
- } else {
- if (!(immediate || allow_immediate))
- vc->action_ = cont;
- if (vc->start(thread) < 0) {
- return NULL;
- }
- if (immediate || allow_immediate) {
- return vc;
- } else {
- return CLUSTER_DELAYED_OPEN;
- }
- }
-#endif
+// //
+// // New connect protocol.
+// // As a VC initiator, establish the VC connection to the remote node
+// // by allocating the VC locally and requiring the caller to pass the
+// // token and channel id in the remote request. The remote handler calls
+// // connect_local to establish the remote side of the connection.
+// //
+// bool immediate = ((options & CLUSTER_OPT_IMMEDIATE) ? true : false);
+// bool allow_immediate = ((options & CLUSTER_OPT_ALLOW_IMMEDIATE) ? true : false);
+//
+// ClusterHandler *ch = ((CacheContinuation *)cont)->ch;
+// if (!ch)
+// return NULL;
+// EThread *t = ch->thread;
+// if (!t)
+// return NULL;
+//
+// EThread *thread = this_ethread();
+// ProxyMutex *mutex = thread->mutex;
+// ClusterVConnection *vc = clusterVCAllocator.alloc();
+// vc->new_connect_read = (options & CLUSTER_OPT_CONN_READ ? 1 : 0);
+// vc->start_time = ink_get_hrtime();
+// vc->last_activity_time = vc->start_time;
+// vc->ch = ch;
+// vc->token.alloc();
+// vc->token.ch_id = ch->id;
+// token = vc->token;
+//#ifdef CLUSTER_THREAD_STEALING
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONNECTIONS_OPENNED_STAT);
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CONNECTIONS_OPEN_STAT);
+// MUTEX_TRY_LOCK(lock, ch->mutex, thread);
+// if (!lock) {
+//#endif
+// if (immediate) {
+// clusterVCAllocator_free(vc);
+// return NULL;
+// }
+// vc->action_ = cont;
+// ink_atomiclist_push(&ch->external_incoming_open_local, (void *) vc);
+// if(ch->thread && ch->thread->signal_hook)
+// ch->thread->signal_hook(ch->thread);
+// return CLUSTER_DELAYED_OPEN;
+//
+//#ifdef CLUSTER_THREAD_STEALING
+// } else {
+// if (!(immediate || allow_immediate))
+// vc->action_ = cont;
+// if (vc->start(thread) < 0) {
+// return NULL;
+// }
+// if (immediate || allow_immediate) {
+// return vc;
+// } else {
+// return CLUSTER_DELAYED_OPEN;
+// }
+// }
+//#endif
+ (void) cont;
+ (void) m;
+ (void) token;
+ (void) options;
+ return NULL;
}
ClusterVConnection *
@@ -367,9 +409,10 @@ bool ClusterProcessor::disable_remote_cluster_ops(ClusterMachine * m)
// Simplify debug access to stats
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
-
+/*
GlobalClusterPeriodicEvent *
PeriodicClusterEvent;
+*/
#ifdef CLUSTER_TOMCAT
extern int cache_clustering_enabled;
@@ -386,6 +429,77 @@ unsigned long cluster_packet_tos = 0;
int RPC_only_CacheCluster = 0;
#endif
+static int machine_change_notify(ClusterMachine * m)
+{
+ //char textbuf[sizeof("255.255.255.255:65535")];
+ int result;
+
+ Debug("cluster_io", "start notify, machine %s %hhu.%hhu.%hhu.%hhu:%d, version: %d.%d",
+ m->dead ? "down" : "up",
+ DOT_SEPARATED(m->ip), m->cluster_port, m->msg_proto_major,
+ m->msg_proto_minor);
+
+ if (m->dead) {
+ ClusterConfiguration *c = this_cluster()->current_configuration();
+ if (c->find(m->ip, m->cluster_port)) {
+ ClusterConfiguration *cc = configuration_remove_machine(c, m);
+ //CLUSTER_DECREMENT_DYN_STAT(CLUSTER_NODES_STAT);
+ this_cluster()->configurations.push(cc);
+ result = 0;
+ }
+ else {
+ result = ENOENT;
+ }
+
+ Note("machine down %hhu.%hhu.%hhu.%hhu:%d, version=%d.%d",
+ DOT_SEPARATED(m->ip), m->cluster_port, m->msg_proto_major,
+ m->msg_proto_minor);
+ /*
+ snprintf(textbuf, sizeof(textbuf), "%hhu.%hhu.%hhu.%hhu:%d", DOT_SEPARATED(m->ip), m->cluster_port);
+ REC_SignalManager(REC_SIGNAL_MACHINE_DOWN, textbuf);
+ */
+ }
+ else {
+ ClusterConfiguration *c = this_cluster()->current_configuration();
+ if (c->find(m->ip, m->cluster_port)) {
+ Warning("machine %hhu.%hhu.%hhu.%hhu:%d already up",
+ DOT_SEPARATED(m->ip), m->cluster_port);
+ result = EEXIST;
+ }
+ else {
+ ClusterConfiguration *cconf = configuration_add_machine(c, m);
+ //CLUSTER_INCREMENT_DYN_STAT(CLUSTER_NODES_STAT);
+ this_cluster()->configurations.push(cconf);
+ result = 0;
+ }
+
+ Note("machine up %hhu.%hhu.%hhu.%hhu:%d, version=%d.%d",
+ DOT_SEPARATED(m->ip), m->cluster_port, m->msg_proto_major,
+ m->msg_proto_minor);
+
+ /*
+ snprintf(textbuf, sizeof(textbuf), "%hhu.%hhu.%hhu.%hhu:%d", DOT_SEPARATED(m->ip), m->cluster_port);
+ REC_SignalManager(REC_SIGNAL_MACHINE_UP, textbuf);
+ */
+ }
+
+ return result;
+}
+
+
+static int
+cluster_ping_config_cb(const char *name, RecDataT /* data_type */, RecData data, void * /* cookie */)
+{
+ if (strcmp(name, "proxy.config.cluster.ping_send_interval_msecs") == 0) {
+ cluster_ping_send_interval = data.rec_int * HRTIME_MSECOND;
+ }
+ else if (strcmp(name, "proxy.config.cluster.ping_latency_threshold_msecs") == 0) {
+ cluster_ping_latency_threshold = data.rec_int * HRTIME_MSECOND;
+ }
+
+ return 0;
+}
+
int
ClusterProcessor::init()
{
@@ -686,6 +800,11 @@ ClusterProcessor::init()
if (num_of_cluster_threads == DEFAULT_NUMBER_OF_CLUSTER_THREADS)
REC_ReadConfigInteger(num_of_cluster_threads, "proxy.config.cluster.threads");
+ REC_ReadConfigInteger(num_of_cluster_connections, "proxy.config.cluster.connections");
+ if (num_of_cluster_connections == 0) {
+ num_of_cluster_connections = num_of_cluster_threads;
+ }
+
REC_EstablishStaticConfigInt32(CacheClusterMonitorEnabled, "proxy.config.cluster.enable_monitor");
REC_EstablishStaticConfigInt32(CacheClusterMonitorIntervalSecs, "proxy.config.cluster.monitor_interval_secs");
REC_ReadConfigInteger(cluster_receive_buffer_size, "proxy.config.cluster.receive_buffer_size");
@@ -695,17 +814,28 @@ ClusterProcessor::init()
REC_ReadConfigInteger(cluster_packet_tos, "proxy.config.cluster.sock_packet_tos");
REC_EstablishStaticConfigInt32(RPC_only_CacheCluster, "proxy.config.cluster.rpc_cache_cluster");
+ REC_EstablishStaticConfigInteger(cluster_flow_ctrl_min_bps, "proxy.config.cluster.flow_ctrl.min_bps");
+ REC_EstablishStaticConfigInteger(cluster_flow_ctrl_max_bps, "proxy.config.cluster.flow_ctrl.max_bps");
+ REC_EstablishStaticConfigInt32(cluster_send_min_wait_time, "proxy.config.cluster.flow_ctrl.min_send_wait_time");
+ REC_EstablishStaticConfigInt32(cluster_send_max_wait_time, "proxy.config.cluster.flow_ctrl.max_send_wait_time");
+ REC_EstablishStaticConfigInt32(cluster_min_loop_interval, "proxy.config.cluster.flow_ctrl.min_loop_interval");
+ REC_EstablishStaticConfigInt32(cluster_max_loop_interval, "proxy.config.cluster.flow_ctrl.max_loop_interval");
+
int cluster_type = 0;
REC_ReadConfigInteger(cluster_type, "proxy.local.cluster.type");
create_this_cluster_machine();
+
+ /*
#ifdef NON_MODULAR
// Cluster API Initializations
clusterAPI_init();
#endif
+
// Start global Cluster periodic event
PeriodicClusterEvent = NEW(new GlobalClusterPeriodicEvent);
PeriodicClusterEvent->init();
+ */
this_cluster = NEW(new Cluster);
ClusterConfiguration *cc = NEW(new ClusterConfiguration);
@@ -713,19 +843,61 @@ ClusterProcessor::init()
cc->n_machines = 1;
cc->machines[0] = this_cluster_machine();
memset(cc->hash_table, 0, CLUSTER_HASH_TABLE_SIZE);
- // 0 dummy output data
+ /*
+ // 0 dummy output data
memset(channel_dummy_output, 0, sizeof(channel_dummy_output));
+ */
+
+ int result;
if (cluster_type == 1) {
- cache_clustering_enabled = 1;
- Note("cache clustering enabled");
- compute_cluster_mode();
+ REC_ReadConfigInteger(cluster_ping_send_interval, "proxy.config.cluster.ping_send_interval_msecs");
+ REC_ReadConfigInteger(cluster_ping_latency_threshold, "proxy.config.cluster.ping_latency_threshold_msecs");
+ cluster_ping_send_interval *= HRTIME_MSECOND;
+ cluster_ping_latency_threshold *= HRTIME_MSECOND;
+
+ REC_RegisterConfigUpdateFunc("proxy.config.cluster.ping_send_interval_msecs", cluster_ping_config_cb, NULL);
+ REC_RegisterConfigUpdateFunc("proxy.config.cluster.ping_latency_threshold_msecs", cluster_ping_config_cb, NULL);
+ REC_EstablishStaticConfigInt32(cluster_ping_retries, "proxy.config.cluster.ping_retries");
+
+ REC_ReadConfigInteger(max_session_count_per_machine, "proxy.config.cluster.max_sessions_per_machine");
+ REC_ReadConfigInteger(session_lock_count_per_machine, "proxy.config.cluster.session_locks_per_machine");
+
+ bool found;
+ IpEndpoint cluster_ip; // ip addr of the cluster interface
+ char *intrName; // Name of the interface we are to use
+ intrName = REC_readString("proxy.config.cluster.ethernet_interface", &found);
+ ink_assert(found && intrName != NULL);
+
+ found = mgmt_getAddrForIntr(intrName, &cluster_ip.sa);
+ if (!found) {
+ ink_fatal(1, "[ClusterProcessor::init] Unable to find network interface %s. Exiting...\n", intrName);
+ } else if (!ats_is_ip4(&cluster_ip)) {
+ ink_fatal(1, "[ClusterProcessor::init] Unable to find IPv4 network interface %s. Exiting...\n", intrName);
+ }
+
+ if (num_of_cluster_connections % 2 != 0) {
+ num_of_cluster_connections++;
+ }
+ cluster_global_init(cluster_main_handler, machine_change_notify);
+
+ result = connection_manager_init(cluster_ip.sin.sin_addr.s_addr);
+ if (result == 0) {
+ cache_clustering_enabled = 1;
+ Note("cache clustering enabled");
+ compute_cluster_mode();
+ }
+ else {
+ cache_clustering_enabled = 0;
+ Note("init fail, cache clustering disabled");
+ }
} else {
cache_clustering_enabled = 0;
Note("cache clustering disabled");
+ result = 0;
}
- return 0;
+ return result;
}
// function added to adhere to the name calling convention of init functions
@@ -742,13 +914,18 @@ ClusterProcessor::start()
this_cluster_machine()->cluster_port = cluster_port;
#endif
if (cache_clustering_enabled && (cacheProcessor.IsCacheEnabled() == CACHE_INITIALIZED)) {
- size_t stacksize;
- REC_ReadConfigInteger(stacksize, "proxy.config.thread.default.stacksize");
- ET_CLUSTER = eventProcessor.spawn_event_threads(num_of_cluster_threads, "ET_CLUSTER", stacksize);
+ /*
+ ET_CLUSTER = eventProcessor.spawn_event_threads(num_of_cluster_threads, "ET_CLUSTER");
for (int i = 0; i < eventProcessor.n_threads_for_type[ET_CLUSTER]; i++) {
- initialize_thread_for_net(eventProcessor.eventthread[ET_CLUSTER][i]);
+ initialize_thread_for_net(eventProcessor.eventthread[ET_CLUSTER][i], i);
+#ifndef STANDALONE_IOCORE
+ extern void initialize_thread_for_http_sessions(EThread *thread, int thread_index);
+ initialize_thread_for_http_sessions(eventProcessor.eventthread[ET_CLUSTER][i], i);
+#endif
}
+ */
+
REC_RegisterConfigUpdateFunc("proxy.config.cluster.cluster_configuration", machine_config_change, (void *) CLUSTER_CONFIG);
do_machine_config_change((void *) CLUSTER_CONFIG, "proxy.config.cluster.cluster_configuration");
// TODO: Remove this?
@@ -757,9 +934,12 @@ ClusterProcessor::start()
do_machine_config_change((void *) MACHINE_CONFIG, "proxy.config.cluster.machine_configuration");
#endif
- accept_handler = NEW(new ClusterAccept(&cluster_port, cluster_receive_buffer_size, cluster_send_buffer_size));
- accept_handler->Init();
+ //accept_handler = NEW(new ClusterAccept(&cluster_port, cluster_receive_buffer_size, cluster_send_buffer_size));
+ //accept_handler->Init();
+
+ connection_manager_start();
}
+
return 0;
}
@@ -846,4 +1026,53 @@ ClusterProcessor::compute_cluster_mode()
}
}
+
+void cluster_main_handler(ClusterSession session, void *context,
+ const int func_id, IOBufferBlock *data, const int data_len)
+{
+ int event = func_id < 0 ? -func_id: func_id;
+ switch (event) {
+ case CLUSTER_CACHE_DATA_ABORT:
+ case CLUSTER_CACHE_DATA_READ_REENABLE: {
+ ink_assert(data_len == 0 && context && data == NULL);
+ CacheContinuation *cc = (CacheContinuation *) context;
+ cc->thread->schedule_imm(cc, event);
+ return;
+ }
+ default: {
+ ClusterCont *cc = clusterContAllocator.alloc();
+ SET_CONTINUATION_HANDLER(cc, &ClusterCont::handleEvent);
+ cc->session = session;
+ cc->context = context;
+ cc->func_id = event;
+ cc->data = data;
+ cc->data_len = data_len;
+ cc->_action = (Continuation *) context;
+ if (cc->_action.continuation) {
+ cc->mutex = cc->_action.mutex;
+ }
+#ifdef DEBUG
+ int64_t nbytes = 0;
+ for (IOBufferBlock *b = data; b; b = b->next) {
+ nbytes += b->read_avail();
+ }
+ ink_assert(data_len == nbytes);
+#endif
+
+ if (event == CLUSTER_CACHE_DATA_READ_DONE
+ || event == CLUSTER_CACHE_DATA_ERROR
+ || event == CLUSTER_CACHE_OP_RESULT_CLUSTER_FUNCTION) {
+ ink_assert(context);
+ ClusterCacheVC *cvc = (ClusterCacheVC *) context;
+ cvc->initial_thread->schedule_imm(cc);
+ return;
+ }
+
+ eventProcessor.schedule_imm(cc);
+ return;
+ }
+ }
+}
+
+
// End of ClusterProcessor.cc
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/ClusterVConnection.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/ClusterVConnection.cc b/iocore/cluster/ClusterVConnection.cc
index 0630edf..a76c912 100644
--- a/iocore/cluster/ClusterVConnection.cc
+++ b/iocore/cluster/ClusterVConnection.cc
@@ -30,6 +30,17 @@
#include "P_Cluster.h"
ClassAllocator<ClusterVConnection> clusterVCAllocator("clusterVCAllocator");
ClassAllocator<ByteBankDescriptor> byteBankAllocator("byteBankAllocator");
+ClassAllocator<ClusterCacheVC> clusterCacheVCAllocator("custerCacheVCAllocator");
+
+int ClusterCacheVC::size_to_init = -1;
+
+#define CLUSTER_WRITE_MIN_SIZE (1 << 16)
+
+#define CLUSTER_CACHE_VC_CLOSE_SESSION \
+{ \
+ cluster_close_session(cs); \
+ session_closed = true; \
+}
ByteBankDescriptor *
ByteBankDescriptor::ByteBankDescriptor_alloc(IOBufferBlock * iob)
@@ -271,13 +282,13 @@ ClusterVConnection::do_io_write(Continuation * c, int64_t nbytes, IOBufferReader
void
ClusterVConnection::do_io_close(int alerrno)
{
- if ((type == VC_CLUSTER) && current_cont) {
- if (((CacheContinuation *)current_cont)->read_cluster_vc == this)
- type = VC_CLUSTER_READ;
- else if (((CacheContinuation *)current_cont)->write_cluster_vc == this)
- type = VC_CLUSTER_WRITE;
- }
- ch->vcs_push(this, type);
+// if ((type == VC_CLUSTER) && current_cont) {
+// if (((CacheContinuation *)current_cont)->read_cluster_vc == this)
+// type = VC_CLUSTER_READ;
+// else if (((CacheContinuation *)current_cont)->write_cluster_vc == this)
+// type = VC_CLUSTER_WRITE;
+// }
+// ch->vcs_push(this, type);
ClusterVConnectionBase::do_io_close(alerrno);
}
@@ -650,4 +661,527 @@ ClusterVConnection::get_disk_io_priority()
return disk_io_priority;
}
+
+ClusterCacheVC::ClusterCacheVC() {
+ size_to_init = sizeof(ClusterCacheVC) - (size_t) & ((ClusterCacheVC *) 0)->vio;
+ memset((char *) &vio, 0, size_to_init);
+}
+
+int
+ClusterCacheVC::handleRead(int, void *)
+{
+ ink_assert(!in_progress && !remote_closed);
+ PUSH_HANDLER(&ClusterCacheVC::openReadReadDone);
+ if (vio.nbytes > 0 && total_len == 0) {
+ SetIOReadMessage msg;
+ msg.nbytes = vio.nbytes;
+ msg.offset = seek_to;
+ if (!cluster_send_message(cs, -CLUSTER_CACHE_DATA_READ_BEGIN, (char *) &msg,
+ sizeof(msg), PRIORITY_HIGH)) {
+ in_progress = true;
+ cluster_set_events(cs, RESPONSE_EVENT_NOTIFY_DEALER);
+ return EVENT_CONT;
+ }
+ goto Lfailed;
+ }
+
+ if (!cluster_send_message(cs, -CLUSTER_CACHE_DATA_READ_REENABLE, NULL, 0,
+ PRIORITY_HIGH)) {
+ in_progress = true;
+ cluster_set_events(cs, RESPONSE_EVENT_NOTIFY_DEALER);
+ return EVENT_CONT;
+ }
+ Lfailed:
+ CLUSTER_CACHE_VC_CLOSE_SESSION;
+ return calluser(VC_EVENT_ERROR);
+}
+
+int
+ClusterCacheVC::openReadReadDone(int event, void *data)
+{
+ cancel_trigger();
+ ink_assert(in_progress);
+ if (event == EVENT_IMMEDIATE)
+ return EVENT_CONT;
+
+ in_progress = false;
+ POP_HANDLER;
+
+ switch (event) {
+ case CLUSTER_CACHE_DATA_ERROR:
+ {
+ ClusterCont *cc = (ClusterCont *) data;
+ ink_assert(cc && cc->data_len > 0);
+ remote_closed = true;
+ event = *(int *) cc->data->start();
+ break;
+ }
+ case CLUSTER_CACHE_DATA_READ_DONE:
+ {
+ ClusterCont *cc = (ClusterCont *) data;
+ ink_assert(cc && d_len == 0);
+
+ d_len = cc->data_len;
+ total_len += d_len;
+ blocks = cc->data;
+ if (total_len >= vio.nbytes)
+ remote_closed = true;
+ break;
+ }
+ case CLUSTER_INTERNEL_ERROR:
+ default:
+ event = VC_EVENT_ERROR;
+ remote_closed = true;
+ break;
+ }
+
+ if (closed) {
+ if (!remote_closed)
+ cluster_send_message(cs, CLUSTER_CACHE_DATA_ABORT, NULL, 0, PRIORITY_HIGH);
+
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+ }
+ // recevied data from cluster
+
+ return handleEvent(event, data);
+}
+
+int
+ClusterCacheVC::openReadStart(int event, void *data)
+{
+ ink_assert(in_progress);
+ in_progress = false;
+ if (_action.cancelled) {
+ if (!remote_closed)
+ cluster_send_message(cs, CLUSTER_CACHE_DATA_ABORT, NULL, 0, PRIORITY_HIGH);
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+ }
+ if (event != CACHE_EVENT_OPEN_READ) {
+ if (event == CACHE_EVENT_OPEN_WRITE) {
+ // the remote side do the pre_write
+ vio.op = VIO::WRITE;
+ SET_HANDLER(&ClusterCacheVC::openWriteMain);
+ _action.continuation->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, this);
+ return EVENT_DONE;
+ }
+ // prevent further trigger
+ remote_closed = true;
+ CLUSTER_CACHE_VC_CLOSE_SESSION;
+ _action.continuation->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, data);
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+ }
+
+ SET_HANDLER(&ClusterCacheVC::openReadMain);
+ callcont(CACHE_EVENT_OPEN_READ);
+ return EVENT_CONT;
+}
+int
+ClusterCacheVC::openReadMain(int event, void *e)
+{
+ cancel_trigger();
+ ink_assert(!in_progress);
+ if (event == VC_EVENT_ERROR || event == VC_EVENT_EOS) {
+ remote_closed = true;
+ CLUSTER_CACHE_VC_CLOSE_SESSION;
+ return calluser(event);
+ }
+
+ int64_t bytes = d_len;
+ int64_t ntodo = vio.ntodo();
+ if (ntodo <= 0)
+ return EVENT_CONT;
+ if (vio.buffer.writer()->max_read_avail() > vio.buffer.writer()->water_mark && vio.ndone) // initiate read of first block
+ return EVENT_CONT;
+ if (!blocks && vio.ntodo() > 0)
+ goto Lread;
+
+ if (bytes > vio.ntodo())
+ bytes = vio.ntodo();
+ vio.buffer.writer()->append_block(blocks);
+ vio.ndone += bytes;
+ blocks = NULL;
+ d_len -= bytes;
+
+ if (vio.ntodo() <= 0)
+ return calluser(VC_EVENT_READ_COMPLETE);
+ else {
+ if (calluser(VC_EVENT_READ_READY) == EVENT_DONE)
+ return EVENT_DONE;
+ // we have to keep reading until we give the user all the
+ // bytes it wanted or we hit the watermark.
+ if (vio.ntodo() > 0 && !vio.buffer.writer()->high_water())
+ goto Lread;
+ return EVENT_CONT;
+ }
+Lread:
+ if (vio.ndone >= (int64_t) doc_len) {
+ // reached the end of the document and the user still wants more
+ return calluser(VC_EVENT_EOS);
+ }
+ // if the state machine calls reenable on the callback from the cache,
+ // we set up a schedule_imm event. The openReadReadDone discards
+ // EVENT_IMMEDIATE events. So, we have to cancel that trigger and set
+ // a new EVENT_INTERVAL event.
+ cancel_trigger();
+ return handleRead(event, e);
+}
+
+int
+ClusterCacheVC::openWriteStart(int event, void *data)
+{
+ ink_assert(in_progress);
+ in_progress = false;
+ if (_action.cancelled) {
+ if (!remote_closed)
+ cluster_send_message(cs, CLUSTER_CACHE_DATA_ABORT, NULL, 0, PRIORITY_HIGH);
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+ }
+ // process the data
+ if (event != CACHE_EVENT_OPEN_WRITE) {
+ // prevent further trigger
+ remote_closed = true;
+ CLUSTER_CACHE_VC_CLOSE_SESSION;
+ _action.continuation->handleEvent(CACHE_EVENT_OPEN_WRITE_FAILED, data);
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+ }
+ SET_HANDLER(&ClusterCacheVC::openWriteMain);
+ return callcont(CACHE_EVENT_OPEN_WRITE);
+}
+int
+ClusterCacheVC::openWriteMain(int , void *)
+{
+ cancel_trigger();
+ ink_assert(!in_progress);
+
+Lagain:
+ if (remote_closed) {
+ if (calluser(VC_EVENT_ERROR) == EVENT_DONE)
+ return EVENT_DONE;
+ return EVENT_CONT;
+ }
+
+ if (!vio.buffer.writer()) {
+ if (calluser(VC_EVENT_WRITE_READY) == EVENT_DONE)
+ return EVENT_DONE;
+ if (!vio.buffer.writer())
+ return EVENT_CONT;
+ }
+
+ int64_t ntodo = vio.ntodo();
+
+ if (ntodo <= 0) {
+ if (calluser(VC_EVENT_WRITE_COMPLETE) == EVENT_DONE)
+ return EVENT_DONE;
+ ink_assert(!"close expected after write COMPLETE");
+ if (vio.ntodo() <= 0)
+ return EVENT_CONT;
+ }
+
+ ntodo = vio.ntodo() + length;
+ int64_t total_avail = vio.buffer.reader()->read_avail();
+ int64_t avail = total_avail;
+ int64_t towrite = avail + length;
+ if (towrite > ntodo) {
+ avail -= (towrite - ntodo);
+ towrite = ntodo;
+ }
+
+ if (!blocks && towrite) {
+ blocks = vio.buffer.reader()->block;
+ offset = vio.buffer.reader()->start_offset;
+ }
+
+ if (avail > 0) {
+ vio.buffer.reader()->consume(avail);
+ vio.ndone += avail;
+ total_len += avail;
+ }
+
+ ink_assert(towrite >= 0);
+ length = towrite;
+
+ int flen = cache_config_target_fragment_size;
+
+ while (length >= flen) {
+ IOBufferBlock *r = clone_IOBufferBlockList(blocks, offset, flen);
+ blocks = iobufferblock_skip(blocks, &offset, &length, flen);
+
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_WRITE_DONE, r, -1,
+ priority);
+ if (remote_closed)
+ goto Lagain;
+
+ data_sent += flen;
+ Debug("data_sent", "sent bytes %d, reminds %"PRId64"", flen, length);
+ }
+ // for the read_from_writer work better,
+ // especailly the slow original
+ flen = CLUSTER_WRITE_MIN_SIZE;
+ if (length >= flen || (vio.ntodo() <= 0 && length > 0)) {
+ data_sent += length;
+ IOBufferBlock *r = clone_IOBufferBlockList(blocks, offset, length);
+ blocks = iobufferblock_skip(blocks, &offset, &length, length);
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_WRITE_DONE, r,
+ -1, priority);
+ if (remote_closed)
+ goto Lagain;
+ Debug("data_sent", "sent bytes %d, reminds %"PRId64"", flen, length);
+ }
+
+ if (vio.ntodo() <= 0) {
+ ink_assert(length == 0 && total_len == vio.nbytes);
+ return calluser(VC_EVENT_WRITE_COMPLETE);
+ }
+ return calluser(VC_EVENT_WRITE_READY);
+}
+
+int
+ClusterCacheVC::removeEvent(int event, void *data)
+{
+ ink_assert(in_progress);
+ in_progress = false;
+ remote_closed = true;
+ CLUSTER_CACHE_VC_CLOSE_SESSION;
+ if (!_action.cancelled)
+ _action.continuation->handleEvent(event, data);
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+}
+
+VIO *
+ClusterCacheVC::do_io_read(Continuation *c, int64_t nbytes, MIOBuffer *abuf)
+{
+ ink_assert(vio.op == VIO::READ && alternate.valid());
+ vio.buffer.writer_for(abuf);
+ vio.set_continuation(c);
+ vio.ndone = 0;
+ vio.nbytes = nbytes;
+ vio.vc_server = this;
+ seek_to = 0;
+ ink_assert(c->mutex->thread_holding);
+
+ ink_assert(!in_progress);
+ if (!trigger && !recursive)
+ trigger = c->mutex->thread_holding->schedule_imm_local(this);
+ return &vio;
+}
+
+VIO *
+ClusterCacheVC::do_io_pread(Continuation *c, int64_t nbytes, MIOBuffer *abuf, int64_t offset)
+{
+ ink_assert(vio.op == VIO::READ && alternate.valid());
+ vio.buffer.writer_for(abuf);
+ vio.set_continuation(c);
+ vio.ndone = 0;
+ vio.nbytes = nbytes;
+ vio.vc_server = this;
+ seek_to = offset;
+ ink_assert(c->mutex->thread_holding);
+
+ ink_assert(!in_progress);
+ if (!trigger && !recursive)
+ trigger = c->mutex->thread_holding->schedule_imm_local(this);
+ return &vio;
+}
+
+VIO *
+ClusterCacheVC::do_io_write(Continuation *c, int64_t nbytes, IOBufferReader *abuf, bool owner)
+{
+ ink_assert(vio.op == VIO::WRITE);
+ ink_assert(!owner && !in_progress);
+ vio.buffer.reader_for(abuf);
+ vio.set_continuation(c);
+ vio.ndone = 0;
+ vio.nbytes = nbytes;
+ doc_len = nbytes; // note: the doc_len maybe not the real length of the body
+ vio.vc_server = this;
+ ink_assert(c->mutex->thread_holding);
+
+ if (nbytes < (1 << 20))
+ priority = PRIORITY_MID;
+ else
+ priority = PRIORITY_LOW;
+
+ CacheHTTPInfo *r = &alternate;
+ SetIOWriteMessage msg;
+ msg.nbytes = nbytes;
+ int len = r->valid() ? r->marshal_length() : 0;
+ msg.hdr_len = len;
+ ink_assert(total_len == 0);
+ ink_assert((frag_type == CACHE_FRAG_TYPE_HTTP && len > 0) ||
+ (frag_type != CACHE_FRAG_TYPE_HTTP && len == 0));
+
+ if (len > 0) {
+ Ptr<IOBufferData> data;
+ data = new_IOBufferData(iobuffer_size_to_index(sizeof msg + len, MAX_BUFFER_SIZE_INDEX));
+ memcpy((char *) data->data(), &msg, sizeof(msg));
+ char *p = (char *) data->data() + sizeof msg;
+ int res = r->marshal(p, len);
+ ink_assert(res >= 0);
+ IOBufferBlock *ret = new_IOBufferBlock(data, sizeof msg + len, 0);
+ ret->_buf_end = ret->_end;
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_WRITE_BEGIN, ret, -1, priority);
+ } else
+ remote_closed = cluster_send_message(cs, -CLUSTER_CACHE_DATA_WRITE_BEGIN, &msg, sizeof msg, priority);
+
+ if (!trigger && !recursive)
+ trigger = c->mutex->thread_holding->schedule_imm_local(this);
+ return &vio;
+}
+
+void
+ClusterCacheVC::do_io_close(int alerrno)
+{
+ ink_assert(mutex->thread_holding == this_ethread());
+ int previous_closed = closed;
+ closed = (alerrno == -1) ? 1 : -1; // Stupid default arguments
+ DDebug("cache_close", "do_io_close %p %d %d", this, alerrno, closed);
+
+ // special case: to cache 0 bytes document
+ if (f.force_empty)
+ closed = 1;
+
+ if (!remote_closed) {
+ if (closed > 0 && vio.op == VIO::WRITE) {
+ if ((f.update && vio.nbytes == 0) || f.force_empty) {
+ //header only update
+ //
+ if (frag_type == CACHE_FRAG_TYPE_HTTP) {
+ if (alternate.valid()) {
+ SetIOCloseMessage msg;
+ msg.h_len = alternate.marshal_length();
+ msg.d_len = 0;
+ msg.total_len = 0;
+
+ Ptr<IOBufferData> d;
+ d = new_IOBufferData(iobuffer_size_to_index(sizeof msg + msg.h_len));
+ char *data = d->data();
+ memcpy(data, &msg, sizeof msg);
+
+ int res = alternate.marshal((char *) data + sizeof msg, msg.h_len);
+ ink_assert(res >= 0 && res <= msg.h_len);
+
+ IOBufferBlock *ret = new_IOBufferBlock(d, sizeof msg + msg.h_len, 0);
+ ret->_buf_end = ret->_end;
+
+ remote_closed = cluster_send_message(cs,
+ -CLUSTER_CACHE_HEADER_ONLY_UPDATE, ret, -1, PRIORITY_HIGH);
+ } else
+ remote_closed = cluster_send_message(cs, -CLUSTER_CACHE_DATA_CLOSE, &total_len,
+ sizeof total_len, PRIORITY_HIGH);
+ } else {
+ remote_closed = cluster_send_message(cs, -CLUSTER_CACHE_DATA_CLOSE,
+ &total_len, sizeof total_len, priority);
+ }
+
+ goto Lfree;
+ } else if ((total_len < vio.nbytes) || length > 0) {
+ int64_t ntodo = vio.ntodo() + length;
+ int64_t total_avail = vio.buffer.reader()->read_avail();
+ int64_t avail = total_avail;
+ int64_t towrite = avail + length;
+ if (towrite > ntodo) {
+ avail -= (towrite - ntodo);
+ towrite = ntodo;
+ }
+
+ if (!blocks && towrite) {
+ blocks = vio.buffer.reader()->block;
+ offset = vio.buffer.reader()->start_offset;
+ }
+
+ if (avail > 0) {
+ vio.buffer.reader()->consume(avail);
+ vio.ndone += avail;
+ total_len += avail;
+ }
+
+ if (vio.ntodo() > 0) {
+ Warning("writer closed success but still want more data");
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_ABORT, NULL, 0,
+ priority);
+ goto Lfree;
+ }
+
+ length = towrite;
+ ink_assert(total_len == vio.nbytes);
+ int flen = cache_config_target_fragment_size;
+ while (length >= flen) {
+ IOBufferBlock *ret = clone_IOBufferBlockList(blocks, offset, flen);
+ blocks = iobufferblock_skip(blocks, &offset, &length, flen);
+
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_WRITE_DONE, ret,
+ -1, priority);
+ if (remote_closed)
+ goto Lfree;
+
+ data_sent += flen;
+ Debug("data_sent", "sent bytes %d, reminds %"PRId64"", flen, length);
+ }
+
+ if (length > 0) {
+ data_sent += length;
+ IOBufferBlock *ret = clone_IOBufferBlockList(blocks, offset, length);
+ blocks = iobufferblock_skip(blocks, &offset, &length, length);
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_WRITE_DONE, ret, -1,
+ priority);
+ if (remote_closed)
+ goto Lfree;
+ Debug("data_sent", "sent bytes done: %"PRId64", reminds %"PRId64"", data_sent, length);
+ }
+ }
+
+ if (doc_len != vio.nbytes) {
+ // for trunk
+ ink_assert(total_len == vio.nbytes && length == 0);
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_CLOSE,
+ &total_len, sizeof total_len, priority);
+ goto Lfree;
+ }
+ ink_assert(data_sent == total_len);
+ }
+
+ if (closed < 0 && vio.op == VIO::WRITE)
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_ABORT, NULL, 0, PRIORITY_HIGH);
+
+ if (vio.op == VIO::READ && !in_progress) {
+ remote_closed = cluster_send_message(cs, CLUSTER_CACHE_DATA_ABORT, NULL, 0, PRIORITY_HIGH);
+ }
+ }
+Lfree:
+ if (!previous_closed && !recursive && !in_progress) {
+ free_ClusterCacheVC(this);
+ }
+}
+
+void
+ClusterCacheVC::reenable(VIO *avio)
+{
+ DDebug("cache_reenable", "reenable %p, trigger %p, in_progress %d", this, trigger, in_progress);
+ (void) avio;
+ ink_assert(avio->mutex->thread_holding);
+ if (!trigger && !in_progress) {
+ trigger = avio->mutex->thread_holding->schedule_imm_local(this);
+ }
+}
+
+void
+ClusterCacheVC::reenable_re(VIO *avio)
+{
+ DDebug("cache_reenable", "reenable %p", this);
+ (void) avio;
+ ink_assert(avio->mutex->thread_holding);
+
+ if (!trigger) {
+ if (!in_progress && !recursive) {
+ handleEvent(EVENT_NONE, (void *) 0);
+ } else if (!in_progress)
+ trigger = avio->mutex->thread_holding->schedule_imm_local(this);
+ }
+}
// End of ClusterVConnection.cc
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/EventPoll.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/EventPoll.cc b/iocore/cluster/EventPoll.cc
new file mode 100644
index 0000000..8a6b9e6
--- /dev/null
+++ b/iocore/cluster/EventPoll.cc
@@ -0,0 +1,158 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include "EventPoll.h"
+
+EventPoll::EventPoll(const int size, int timeout) : _size(size)
+{
+ int bytes;
+
+#if TS_USE_EPOLL
+ _extra_events = EPOLLET;
+ _timeout = timeout;
+ _poll_fd = epoll_create(_size);
+ bytes = sizeof(struct epoll_event) * size;
+ _events = (struct epoll_event *)ats_malloc(bytes);
+#elif TS_USE_KQUEUE
+ _extra_events = INK_EV_EDGE_TRIGGER;
+ _timeout.tv_sec = timeout / 1000;
+ _timeout.tv_nsec = 1000000 * (timeout % 1000);
+ _poll_fd = kqueue();
+ bytes = sizeof(struct kevent) * size;
+ _events = (struct kevent *)ats_malloc(bytes);
+#elif TS_USE_PORT
+ _extra_events = 0;
+ _timeout.tv_sec = timeout / 1000;
+ _timeout.tv_nsec = 1000000 * (timeout % 1000);
+ _poll_fd = port_create();
+ bytes = sizeof(port_event_t) * size;
+ _events = (port_event_t *)ats_malloc(bytes);
+#endif
+}
+
+EventPoll::~EventPoll()
+{
+ ats_free(_events);
+ close(_poll_fd);
+}
+
+int EventPoll::attach(const int fd, const int e, void *data)
+{
+#if TS_USE_EPOLL
+ struct epoll_event ev;
+ memset(&ev, 0, sizeof(ev));
+ ev.events = e | _extra_events;
+ ev.data.ptr = data;
+ return epoll_ctl(_poll_fd, EPOLL_CTL_ADD, fd, &ev);
+#elif TS_USE_KQUEUE
+ struct kevent ev[2];
+ int n = 0;
+ if (e & EVENTIO_READ) {
+ EV_SET(&ev[n++], fd, EVFILT_READ, EV_ADD | _extra_events, 0, 0, data);
+ }
+ if (e & EVENTIO_WRITE) {
+ EV_SET(&ev[n++], fd, EVFILT_WRITE, EV_ADD | _extra_events, 0, 0, data);
+ }
+ return kevent(_poll_fd, ev, n, NULL, 0, NULL);
+#elif TS_USE_PORT
+ return port_associate(_poll_fd, PORT_SOURCE_FD, fd, e, data);
+#endif
+}
+
+int EventPoll::modify(const int fd, const int e, void *data)
+{
+#if TS_USE_EPOLL
+ struct epoll_event ev;
+ memset(&ev, 0, sizeof(ev));
+ ev.events = e | _extra_events;
+ ev.data.ptr = data;
+ return epoll_ctl(_poll_fd, EPOLL_CTL_MOD, fd, &ev);
+#elif TS_USE_KQUEUE
+ struct kevent ev[2];
+ int n = 0;
+ if (e & EVENTIO_READ) {
+ EV_SET(&ev[n++], fd, EVFILT_READ, EV_ADD | _extra_events, 0, 0, data);
+ }
+ else {
+ EV_SET(&ev[n++], fd, EVFILT_READ, EV_DELETE, 0, 0, data);
+ }
+
+ if (e & EVENTIO_WRITE) {
+ EV_SET(&ev[n++], fd, EVFILT_WRITE, EV_ADD | _extra_events, 0, 0, data);
+ }
+ else {
+ EV_SET(&ev[n++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, data);
+ }
+ return kevent(_poll_fd, ev, n, NULL, 0, NULL);
+#elif TS_USE_PORT
+ return port_associate(_poll_fd, PORT_SOURCE_FD, fd, e, data);
+#endif
+}
+
+int EventPoll::detach(const int fd)
+{
+#if TS_USE_EPOLL
+ return epoll_ctl(_poll_fd, EPOLL_CTL_DEL, fd, NULL);
+#elif TS_USE_PORT
+ return port_dissociate(_poll_fd, PORT_SOURCE_FD, fd);
+#else
+ return 0;
+#endif
+}
+
+int EventPoll::poll()
+{
+#if TS_USE_EPOLL
+ return epoll_wait(_poll_fd, _events, _size, _timeout);
+#elif TS_USE_KQUEUE
+ return kevent(_poll_fd, NULL, 0, _events, _size, &_timeout);
+#elif TS_USE_PORT
+ int retval;
+ unsigned nget = 1;
+ if((retval = port_getn(_poll_fd, _events,
+ _size, &nget, &_timeout)) == 0)
+ {
+ result = (int)nget;
+ } else {
+ switch(errno) {
+ case EINTR:
+ case EAGAIN:
+ case ETIME:
+ if (nget > 0) {
+ result = (int)nget;
+ }
+ else {
+ result = 0;
+ }
+ break;
+ default:
+ result = -1;
+ break;
+ }
+ }
+ return result;
+#else
+#error port me
+#endif
+}
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/EventPoll.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/EventPoll.h b/iocore/cluster/EventPoll.h
new file mode 100644
index 0000000..ccb65de
--- /dev/null
+++ b/iocore/cluster/EventPoll.h
@@ -0,0 +1,105 @@
+/** @file
+
+ A brief file description
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#ifndef __EVENT_POLL_H__
+#define __EVENT_POLL_H__
+
+#include "P_Net.h"
+
+class EventPoll {
+ public:
+ EventPoll(const int size, int timeout);
+ ~EventPoll();
+ int attach(const int fd, const int e, void *data);
+ int modify(const int fd, const int e, void *data);
+ int detach(const int fd);
+ int poll();
+
+#if TS_USE_KQUEUE
+ /* we define these here as numbers, because for kqueue mapping them to a combination of
+ * filters / flags is hard to do. */
+ inline int kq_event_convert(int16_t event, uint16_t flags)
+ {
+ int r = 0;
+
+ if (event == EVFILT_READ) {
+ r |= INK_EVP_IN;
+ }
+ else if (event == EVFILT_WRITE) {
+ r |= INK_EVP_OUT;
+ }
+
+ if (flags & EV_EOF) {
+ r |= INK_EVP_HUP;
+ }
+ return r;
+ }
+#endif
+
+ inline int getEvents(const int index)
+ {
+#if TS_USE_EPOLL
+ return _events[index].events;
+#elif TS_USE_KQUEUE
+ /* we define these here as numbers, because for kqueue mapping them to a combination of
+ * filters / flags is hard to do. */
+ return kq_event_convert(_events[index].filter, _events[index].flags);
+#elif TS_USE_PORT
+ return _events[index].portev_events;
+#else
+#error port me
+#endif
+ }
+
+ inline void *getData(const int index)
+ {
+#if TS_USE_EPOLL
+ return _events[index].data.ptr;
+#elif TS_USE_KQUEUE
+ return _events[index].udata;
+#elif TS_USE_PORT
+ return _events[index].portev_user;
+#else
+#error port me
+#endif
+ }
+
+ protected:
+ int _size; //max events (fd)
+ int _extra_events;
+ int _poll_fd;
+
+#if TS_USE_EPOLL
+ struct epoll_event *_events;
+ int _timeout;
+#elif TS_USE_KQUEUE
+ struct kevent *_events;
+ struct timespec _timeout;
+#elif TS_USE_PORT
+ port_event_t *_events;
+ timespec_t _timeout;
+#endif
+};
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/Makefile.am
----------------------------------------------------------------------
diff --git a/iocore/cluster/Makefile.am b/iocore/cluster/Makefile.am
index 1d8266d..b49046d 100644
--- a/iocore/cluster/Makefile.am
+++ b/iocore/cluster/Makefile.am
@@ -57,5 +57,12 @@ libinkcluster_a_SOURCES = \
P_ClusterLoadMonitor.h \
P_ClusterMachine.h \
P_TimeTrace.h \
- Inline.cc
+ Inline.cc \
+ global.cc \
+ nio.cc \
+ session.cc \
+ message.cc \
+ connection.cc \
+ machine.cc \
+ EventPoll.cc
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/P_Cluster.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/P_Cluster.h b/iocore/cluster/P_Cluster.h
index aa3d6a5..f24d3de 100644
--- a/iocore/cluster/P_Cluster.h
+++ b/iocore/cluster/P_Cluster.h
@@ -123,6 +123,8 @@ enum
cluster_stat_count
};
+#define SIZE_OF_FRAGEMENT ((1 << 20) - 128)
+
extern RecRawStatBlock *cluster_rsb;
#define CLUSTER_INCREMENT_DYN_STAT(x) \
RecIncrRawStat(cluster_rsb, mutex->thread_holding, (int) x, 1);
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/P_ClusterCache.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/P_ClusterCache.h b/iocore/cluster/P_ClusterCache.h
index 10fc46a..162fef0 100644
--- a/iocore/cluster/P_ClusterCache.h
+++ b/iocore/cluster/P_ClusterCache.h
@@ -53,6 +53,9 @@
/****************************************************************************/
#include "P_ClusterMachine.h"
+#include "clusterinterface.h"
+
+extern int enable_cache_empty_http_doc;
//
// Cluster Processor
@@ -310,6 +313,7 @@ struct ClusterVCToken
//
typedef void ClusterFunction(ClusterHandler * ch, void *data, int len);
typedef ClusterFunction *ClusterFunctionPtr;
+typedef void ClusterFunctionExt(ClusterSession cs, void *context, void *data);
struct ClusterVConnectionBase;
@@ -512,7 +516,9 @@ struct ClusterVConnection: public ClusterVConnectionBase
ClusterVConnection(int is_new_connect_read = 0);
~ClusterVConnection();
void free(); // Destructor actions (we are using ClassAllocator)
-
+ virtual bool is_read_from_writer() {
+ return false;
+ }
virtual void do_io_close(int lerrno = -1);
virtual VIO *do_io_read(Continuation * c, int64_t nbytes, MIOBuffer * buf);
virtual VIO *do_io_write(Continuation * c, int64_t nbytes, IOBufferReader * buf, bool owner = false);
@@ -739,9 +745,9 @@ extern ClusterFunction close_channel_ClusterFunction;
extern ClusterFunction get_hostinfo_ClusterFunction;
extern ClusterFunction put_hostinfo_ClusterFunction;
extern ClusterFunction cache_lookup_ClusterFunction;
-extern ClusterFunction cache_op_ClusterFunction;
+//extern ClusterFunction cache_op_ClusterFunction;
extern ClusterFunction cache_op_malloc_ClusterFunction;
-extern ClusterFunction cache_op_result_ClusterFunction;
+//extern ClusterFunction cache_op_result_ClusterFunction;
extern ClusterFunction set_channel_data_ClusterFunction;
extern ClusterFunction post_setchan_send_ClusterFunction;
extern ClusterFunction set_channel_pin_ClusterFunction;
@@ -750,6 +756,9 @@ extern ClusterFunction set_channel_priority_ClusterFunction;
extern ClusterFunction post_setchan_priority_ClusterFunction;
extern ClusterFunction default_api_ClusterFunction;
+extern ClusterFunctionExt cache_op_ClusterFunction;
+extern ClusterFunctionExt cache_op_result_ClusterFunction;
+
struct ClusterFunctionDescriptor
{
bool fMalloced; // the function will free the data
@@ -767,7 +776,8 @@ struct ClusterFunctionDescriptor
#ifndef DEFINE_CLUSTER_FUNCTIONS
extern
#endif
-ClusterFunctionDescriptor clusterFunction[]
+ClusterFunctionDescriptor clusterFunction[1]
+#if 0
#ifdef DEFINE_CLUSTER_FUNCTIONS
= {
{false, true, CMSG_LOW_PRI, test_ClusterFunction, 0},
@@ -863,7 +873,7 @@ ClusterFunctionDescriptor clusterFunction[]
// ********** ADD NEW ENTRIES ABOVE THIS LINE ************
}
#endif
-
+#endif
;
extern unsigned SIZE_clusterFunction; // clusterFunction[] entries
@@ -983,10 +993,27 @@ ClusterFuncToQpri(int cluster_func)
#define API_F29_CLUSTER_FUNCTION 79
#define API_F30_CLUSTER_FUNCTION 80
-#define API_STARECT_CLUSTER_FUNCTION API_F01_CLUSTER_FUNCTION
-#define API_END_CLUSTER_FUNCTION API_F30_CLUSTER_FUNCTION
+#define CLUSTER_CACHE_OP_CLUSTER_FUNCTION (CLUSTER_MSG_START+81)
+#define CLUSTER_CACHE_DATA_READ_BEGIN (CLUSTER_MSG_START+82)
+#define CLUSTER_CACHE_DATA_READ_REENABLE (CLUSTER_MSG_START+83)
+#define CLUSTER_CACHE_DATA_WRITE_BEGIN (CLUSTER_MSG_START+84)
+#define CLUSTER_CACHE_HEADER_ONLY_UPDATE (CLUSTER_MSG_START+85)
+#define CLUSTER_CACHE_DATA_CLOSE (CLUSTER_MSG_START+86)
+#define CLUSTER_CACHE_DATA_ABORT (CLUSTER_MSG_START+87)
+#define CLUSTER_CACHE_DATA_WRITE_DONE (CLUSTER_MSG_START+88)
+
+#define CLUSTER_CACHE_OP_RESULT_CLUSTER_FUNCTION (CLUSTER_MSG_START+89)
+#define CLUSTER_CACHE_DATA_READ_DONE (CLUSTER_MSG_START+90)
+#define CLUSTER_CACHE_DATA_ERROR (CLUSTER_MSG_START+91)
-#define UNDEFINED_CLUSTER_FUNCTION 0xFDEFFDEF
+#define CLUSTER_INTERNEL_ERROR (CLUSTER_MSG_START+100)
+#define CLUSTER_PING_CLUSTER_FUNCTION (CLUSTER_MSG_START+101) 1
+#define CLUSTER_PING_REPLY_CLUSTER_FUNCTION (CLUSTER_MSG_START+102)
+
+#define API_STARECT_CLUSTER_FUNCTION API_F01_CLUSTER_FUNCTION
+#define API_END_CLUSTER_FUNCTION API_F30_CLUSTER_FUNCTION
+
+#define UNDEFINED_CLUSTER_FUNCTION 0xFDEFFDEF
//////////////////////////////////////////////
// Initial cluster connect exchange message
@@ -1171,4 +1198,328 @@ ClusterVC_remove_write(ClusterVConnectionBase * vc)
}
+struct ClusterCacheVC: public CacheVConnection
+{
+ static int size_to_init;
+ Action _action;
+ Ptr<IOBufferData> buf; // for read
+ Ptr<IOBufferData> first_buf; // the head fragment
+ Ptr<IOBufferBlock> blocks; // data available to write
+
+ CacheHTTPInfo alternate;
+
+ VIO vio;
+ ink_hrtime start_time;
+ CacheFragType frag_type;
+ int64_t seek_to; // pread offset
+ int64_t offset; // offset into 'blocks' of data to write
+ int64_t length; // length of data available to write
+ int64_t total_len;
+ int64_t data_sent;
+ int64_t doc_len;
+
+ int doc_pos; // read position in 'buf'
+ int d_len; // the length of data in 'buf'
+
+ int closed;
+ int recursive;
+ int disk_io_priority;
+ int probe_depth;
+ MessagePriority priority;
+
+ time_t time_pin;
+ EThread *initial_thread; // initial thread open_XX was called on
+ ClusterSession cs;
+ Event *trigger;
+ ContinuationHandler save_handler;
+
+
+ bool in_progress; //
+ bool remote_closed;
+ bool session_closed;
+
+ union
+ {
+ uint32_t flags;
+ struct
+ {
+ unsigned int use_first_key:1;
+ unsigned int overwrite:1; // overwrite first_key Dir if it exists
+ unsigned int close_complete:1; // WRITE_COMPLETE is final
+ unsigned int sync:1; // write to be committed to durable storage before WRITE_COMPLETE
+ unsigned int evacuator:1;
+ unsigned int single_fragment:1;
+ unsigned int evac_vector:1;
+ unsigned int lookup:1;
+ unsigned int update:1;
+ unsigned int remove:1;
+ unsigned int remove_aborted_writers:1;
+ unsigned int open_read_timeout:1; // UNUSED
+ unsigned int data_done:1;
+ unsigned int read_from_writer_called:1;
+ unsigned int not_from_ram_cache:1; // entire object was from ram cache
+ unsigned int rewrite_resident_alt:1;
+ unsigned int readers:1;
+ unsigned int doc_from_ram_cache:1;
+#ifdef HIT_EVACUATE
+ unsigned int hit_evacuate:1;
+#endif
+#ifdef HTTP_CACHE
+ unsigned int force_empty:1; // used for cache empty http document
+#endif
+#ifdef SSD_CACHE
+ unsigned int read_from_ssd:1;
+ unsigned int write_into_ssd:1;
+ unsigned int ram_fixup:1;
+ unsigned int transistor:1;
+#endif
+ } f;
+ };
+ ClusterCacheVC();
+ VIO *do_io_read(Continuation *c, int64_t nbytes, MIOBuffer *buf); // invoke remote
+ VIO *do_io_pread(Continuation *c, int64_t nbytes, MIOBuffer *buf, int64_t offset); // invoke remote
+ VIO *do_io_write(Continuation *c, int64_t nbytes, IOBufferReader *buf, bool owner = false); // invoke remote
+ void do_io_close(int lerrno = -1); // invoke remote ?
+ void reenable(VIO *avio); // invoke remote ?
+ void reenable_re(VIO *avio); // invoke remote ?
+
+ void do_remote_close(); // invoke remote, for cancel or error
+
+ virtual int get_header(void **, int *)
+ {
+ ink_assert(!"implemented");
+ return -1;
+ }
+ virtual int set_header(void *, int)
+ {
+ ink_assert(!"implemented");
+ return -1;
+ }
+ virtual int get_single_data(void **, int *)
+ {
+ ink_assert(!"implemented");
+ return -1;
+ }
+
+#ifdef HTTP_CACHE
+ virtual void set_http_info(CacheHTTPInfo *info) {
+ if (enable_cache_empty_http_doc) {
+ MIMEField *field = info->m_alt->m_response_hdr.field_find(
+ MIME_FIELD_CONTENT_LENGTH, MIME_LEN_CONTENT_LENGTH);
+ if (field && !field->value_get_int64())
+ f.force_empty = 1;
+ else
+ f.force_empty = 0;
+ } else
+ f.force_empty = 0;
+ alternate.copy_shallow(info);
+ info->clear();
+ }
+ virtual void get_http_info(CacheHTTPInfo ** info) {
+ *info = &alternate;
+ }
+#endif
+
+ bool is_ram_cache_hit() {
+ ink_assert(vio.op == VIO::READ);
+ return !f.not_from_ram_cache;
+ }
+ virtual bool set_disk_io_priority(int priority)
+ {
+ disk_io_priority = priority;
+ return true;
+ }
+ virtual int get_disk_io_priority() {
+ return disk_io_priority;
+ }
+ virtual bool set_pin_in_cache(time_t t) {
+ time_pin = t;
+ return true;
+ }
+ virtual time_t get_pin_in_cache() {
+ return time_pin;
+ }
+ virtual int64_t get_object_size()
+ {
+ return alternate.object_size_get();
+ }
+ virtual bool is_read_from_writer()
+ {
+ return f.read_from_writer_called;
+ }
+ virtual bool is_ram_cache_hit() const
+ {
+ return !f.not_from_ram_cache;
+ }
+ virtual bool is_pread_capable()
+ {
+ return true;
+ }
+ void
+ cancel_trigger()
+ {
+ if (trigger) {
+ trigger->cancel_action();
+ trigger = NULL;
+ }
+ }
+
+ int calluser(int event);
+ int callcont(int event);
+ int handleRead(int event, void *data);
+ int openReadReadDone(int event, void *data);
+// int handleWrite(int event, void *data);
+// int openWriteWriteDone(int event, void *data);
+ int openReadStart(int event, void *data);
+ int openWriteStart(int event, void *data);
+ int openReadMain(int event, void *data);
+ int openWriteMain(int event, void *data);
+ int removeEvent(int event, void *data);
+};
+
+
+
+struct SetIOReadMessage: public ClusterMessageHeader
+{
+ int64_t nbytes;
+ int64_t offset;
+};
+
+struct SetIOWriteMessage: public ClusterMessageHeader
+{
+ int32_t hdr_len;
+ int64_t nbytes;
+};
+
+struct SetIOCloseMessage: public ClusterMessageHeader
+{
+ int h_len;
+ int d_len;
+ int64_t total_len;
+};
+
+struct SetIOReenableMessage: public ClusterMessageHeader
+{
+ int reenable;
+};
+struct SetResponseMessage: public ClusterMessageHeader
+{
+
+};
+
+inline IOBufferBlock *
+clone_IOBufferBlockList(IOBufferBlock *ab, int64_t offset, int64_t len)
+{
+ IOBufferBlock *b = ab;
+ IOBufferBlock *head = NULL;
+ IOBufferBlock *clone = NULL;
+
+ while (b && len >= 0) {
+ int64_t max_bytes = b->read_avail();
+ max_bytes -= offset;
+ if (max_bytes <= 0) {
+ offset = -max_bytes;
+ b = b->next;
+ continue;
+ }
+
+ if (!head) {
+ head = b->clone();
+ head->consume(offset);
+ clone = head;
+ } else {
+ clone->next = b->clone();
+ clone = clone->next;
+ }
+
+ len -= max_bytes;
+ b = b->next;
+ offset = 0;
+ }
+ if (clone && len < 0)
+ clone->fill(len);
+ return head;
+}
+
+ClusterCacheVC *new_ClusterCacheVC();
+void free_ClusterCacheVC(ClusterCacheVC *ccvc);
+
+inline int
+ClusterCacheVC::calluser(int event)
+{
+ recursive++;
+ ink_assert(this_ethread() == vio._cont->mutex->thread_holding);
+ vio._cont->handleEvent(event, (void *) &vio);
+ recursive--;
+ if (closed && !in_progress) {
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+ }
+ return EVENT_CONT;
+}
+
+inline int
+ClusterCacheVC::callcont(int event)
+{
+ recursive++;
+ ink_assert(this_ethread() == _action.mutex->thread_holding);
+ _action.continuation->handleEvent(event, this);
+ recursive--;
+ if (closed && !in_progress) {
+ free_ClusterCacheVC(this);
+ return EVENT_DONE;
+ } else if (vio.vc_server)
+ handleEvent(EVENT_IMMEDIATE, 0);
+ return EVENT_DONE;
+}
+
+extern ClassAllocator<ClusterCacheVC> clusterCacheVCAllocator;
+
+inline ClusterCacheVC *
+new_ClusterCacheVC(Continuation *cont)
+{
+ EThread *t = cont->mutex->thread_holding;
+ ClusterCacheVC *c = clusterCacheVCAllocator.alloc();
+ c->_action = cont;
+ c->initial_thread = t;
+ c->mutex = cont->mutex;
+ c->start_time = ink_get_hrtime();
+ ink_assert(c->trigger == NULL);
+
+ Debug("cluster_cache_new", "new %p", c);
+ return c;
+}
+
+inline void
+free_ClusterCacheVC(ClusterCacheVC *cont)
+{
+ Debug("cluster_cache_free", "free %p", cont);
+ ink_assert(cont->mutex->thread_holding == this_ethread());
+
+ if (cont->trigger)
+ cont->trigger->cancel();
+ ink_assert(!cont->in_progress);
+
+ if (!cont->session_closed)
+ cluster_close_session(cont->cs);
+
+ cont->vio.buffer.clear();
+ cont->vio.mutex.clear();
+#ifdef HTTP_CACHE
+ if (cont->vio.op == VIO::WRITE)
+ cont->alternate.destroy();
+ else
+ cont->alternate.clear();
+#endif
+ cont->_action.cancelled = 0;
+ cont->_action.mutex.clear();
+ cont->mutex.clear();
+ cont->buf.clear();
+ cont->first_buf.clear();
+ cont->blocks.clear();
+
+ memset((char *) &cont->vio, 0, cont->size_to_init);
+
+ clusterCacheVCAllocator.free(cont);
+}
#endif /* _Cluster_h */
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/P_ClusterCacheInternal.h
----------------------------------------------------------------------
diff --git a/iocore/cluster/P_ClusterCacheInternal.h b/iocore/cluster/P_ClusterCacheInternal.h
index 8b62d44..44b675c 100644
--- a/iocore/cluster/P_ClusterCacheInternal.h
+++ b/iocore/cluster/P_ClusterCacheInternal.h
@@ -81,6 +81,88 @@ extern int ET_CLUSTER;
#define PROBE_LOCAL_CACHE_FIRST DO_REPLICATION
#define PROBE_LOCAL_CACHE_LAST false
+struct ClusterCont: public Continuation
+{
+ ClusterSession session;
+ Ptr<IOBufferBlock> data;
+ void *context;
+ int func_id;
+ int data_len;
+
+ Action _action;
+ int handleEvent(int event, void *d);
+ IOBufferData *copy_data();
+ int copy_data(char *buf, int size);
+ void consume(int size);
+};
+
+inline IOBufferData *
+ClusterCont::copy_data() {
+ IOBufferData *buf = new_IOBufferData(iobuffer_size_to_index(data_len, MAX_BUFFER_SIZE_INDEX));
+ char *p = buf->data();
+ for (IOBufferBlock *b = data; b; b = b->next) {
+ memcpy(p, b->_start, b->_end - b->_start);
+ p += b->_end - b->_start;
+ }
+ return buf;
+}
+
+inline void
+ClusterCont::consume(int size) {
+
+ int64_t sz = size;
+ while (data && sz >= data->read_avail()) {
+ sz -= data->read_avail();
+ data = data->next;
+ }
+ if (data)
+ data->_start += sz;
+
+ data_len = data_len > size ? (data_len - size) : 0;
+}
+
+inline int
+ClusterCont::copy_data(char *buf, int len)
+{
+ ink_assert(data_len >= len);
+ IOBufferBlock *b = data;
+ int64_t sz = len;
+ while (len > 0 && b) {
+ int64_t avail = b->read_avail();
+ sz -= avail;
+ if (sz < 0) {
+ memcpy(buf, b->_start, avail + sz);
+ sz = 0;
+ break;
+ } else {
+ memcpy(buf, b->_start, avail);
+ buf += avail;
+ b = b->next;
+ }
+ }
+ return len - (int) sz;
+}
+extern ClassAllocator<ClusterCont> clusterContAllocator;
+
+inline int
+ClusterCont::handleEvent(int, void *) {
+ if (func_id == CLUSTER_CACHE_OP_CLUSTER_FUNCTION)
+ cache_op_ClusterFunction(session, context, this);
+ else if (func_id == CLUSTER_CACHE_OP_RESULT_CLUSTER_FUNCTION)
+ cache_op_result_ClusterFunction(session, context, this);
+ else if (func_id == CLUSTER_INTERNEL_ERROR)
+ _action.continuation->handleEvent(func_id, NULL);
+ else
+ _action.continuation->handleEvent(func_id, this);
+
+ mutex.clear();
+ _action.mutex.clear();
+ data = NULL;
+
+ clusterContAllocator.free(this);
+ return EVENT_DONE;
+}
+
//
// This continuation handles all cache cluster traffic, on both
// sides (state machine client and cache server)
@@ -89,111 +171,91 @@ struct CacheContinuation;
typedef int (CacheContinuation::*CacheContHandler) (int, void *);
struct CacheContinuation:public Continuation
{
+ static int size_to_init;
enum
{
MagicNo = 0x92183123
};
int magicno;
- void *callback_data;
- void *callback_data_2;
INK_MD5 url_md5;
- Event *timeout;
- Action action;
- ClusterMachine *target_machine;
- int probe_depth;
+
ClusterMachine *past_probes[CONFIGURATION_HISTORY_PROBE_DEPTH];
+
+ ClusterVCToken token;
+
+ CacheHTTPInfo cache_vc_info; // for get_http_info
+// MIOBuffer doc_data;
+ Ptr<IOBufferBlock> doc_data;
+ // Incoming data generated from unmarshaling request/response ops
+ Ptr<IOBufferData> rw_buf_msg;
+ Arena ic_arena;
+ CacheHTTPHdr ic_request; // for lookup or read
+ CacheHTTPInfo ic_old_info; // for update
+ CacheHTTPInfo ic_new_info; // for set_http_info
+
+ ClusterSession cs;
+ char *ic_hostname;
+ int ic_hostname_len;
+
ink_hrtime start_time;
- ClusterMachine *from;
- ClusterHandler *ch;
- VConnection *cache_vc;
+ ClusterMachine *target_machine;
+ int probe_depth;
+
+ CacheVC *cache_vc;
+ Action *pending_action;
bool cache_read;
+ bool request_purge;
+ bool have_all_data; // all object data in response
+ bool expect_next;
+ bool writer_aborted;
int result; // return event code
int result_error; // error code associated with event
- ClusterVCToken token;
- unsigned int seq_number;
uint16_t cfl_flags; // Request flags; see CFL_XXX defines
+
+ unsigned int seq_number;
CacheFragType frag_type;
- int nbytes;
+ int nbytes; // the msg nbyts
unsigned int target_ip;
int request_opcode;
- bool request_purge;
- bool local_lookup_only;
- bool no_reply_message;
- bool request_timeout; // timeout occurred before
- // op complete
- bool expect_cache_callback;
-
- // remove_and_delete() specific data
- bool use_deferred_callback;
-
- // open_read/write data
-
- time_t pin_in_cache;
-
- // setMsgBufferLen(), allocMsgBuffer() and freeMsgBuffer() data
-
- Ptr<IOBufferData> rw_buf_msg;
+ int header_len;
int rw_buf_msg_len;
- // open data
-
- ClusterVConnection *read_cluster_vc;
- ClusterVConnection *write_cluster_vc;
- int cluster_vc_channel;
- ClusterVCToken open_local_token;
-
- // Readahead on open read specific data
-
- int caller_buf_freebytes; // remote bufsize for
- // initial data
- VIO *readahead_vio;
- IOBufferReader *readahead_reader;
- Ptr<IOBufferBlock> readahead_data;
- bool have_all_data; // all object data in response
-
- CacheHTTPInfo cache_vc_info;
- OneWayTunnel *tunnel;
- Ptr<ProxyMutex> tunnel_mutex;
- CacheContinuation *tunnel_cont;
- bool tunnel_closed;
- Action *cache_action;
- Event *lookup_open_write_vc_event;
-
- // Incoming data generated from unmarshaling request/response ops
-
- Arena ic_arena;
- CacheHTTPHdr ic_request;
- CacheHTTPHdr ic_response;
+ time_t pin_in_cache;
+ int64_t doc_size;
+ int64_t total_length;
+ VIO *vio; //
+ IOBufferReader *reader; // for normal read
CacheLookupHttpConfig *ic_params;
- CacheHTTPInfo ic_old_info;
- CacheHTTPInfo ic_new_info;
- Ptr<IOBufferData> ic_hostname;
- int ic_hostname_len;
-
- // debugging
- int cache_op_ClusterFunction;
-
- int lookupEvent(int event, void *d);
- int probeLookupEvent(int event, void *d);
- int remoteOpEvent(int event, Event * e);
- int replyLookupEvent(int event, void *d);
- int replyOpEvent(int event, VConnection * vc);
- int handleReplyEvent(int event, Event * e);
- int callbackEvent(int event, Event * e);
- int setupVCdataRead(int event, VConnection * vc);
- int VCdataRead(int event, VIO * target_vio);
- int setupReadWriteVC(int, VConnection *);
- ClusterVConnection *lookupOpenWriteVC();
- int lookupOpenWriteVCEvent(int, Event *);
- int localVCsetupEvent(int event, ClusterVConnection * vc);
- void insert_cache_callback_user(ClusterVConnection *, int, void *);
- int insertCallbackEvent(int, Event *);
- void callback_user(int result, void *d);
- void defer_callback_result(int result, void *d);
- int callbackResultEvent(int event, Event * e);
- void setupReadBufTunnel(VConnection *, VConnection *);
- int tunnelClosedEvent(int event, void *);
- int remove_and_delete(int, Event *);
+ MIOBuffer *mbuf;
+ EThread *thread;
+
+// int lookupEvent(int event, void *d);
+// int probeLookupEvent(int event, void *d);
+// int remoteOpEvent(int event, Event * e);
+// int replyLookupEvent(int event, void *d);
+ int replyOpEvent();
+// int handleReplyEvent(int event, Event * e);
+// int callbackEvent(int event, Event * e);
+ int setupVCdataRead(int event, void *data);
+ int setupVCdataWrite(int event, void *data);
+ int setupVCdataRemove(int event, void *data);
+ int setupVCdataLink(int event, void *data);
+ int setupVCdataDeref(int event, void *data);
+ int VCdataRead(int event, void *data);
+ int VCdataWrite(int event, void *data);
+ int VCSmallDataRead(int event, void *data);
+// int setupReadWriteVC(int, VConnection *);
+// ClusterVConnection *lookupOpenWriteVC();
+// int lookupOpenWriteVCEvent(int, Event *);
+// int localVCsetupEvent(int event, ClusterVConnection * vc);
+// void insert_cache_callback_user(ClusterVConnection *, int, void *);
+// int insertCallbackEvent(int, Event *);
+// void callback_user(int result, void *d);
+// void defer_callback_result(int result, void *d);
+// int callbackResultEvent(int event, Event * e);
+// void setupReadBufTunnel(VConnection *, VConnection *);
+// int tunnelClosedEvent(int event, void *);
+// int remove_and_delete(int, Event *);
inline void setMsgBufferLen(int l, IOBufferData * b = 0) {
@@ -254,66 +316,26 @@ struct CacheContinuation:public Continuation
if (ic_request.valid()) {
ic_request.clear();
}
- if (ic_response.valid()) {
- ic_response.clear();
- }
+// if (ic_response.valid()) {
+// ic_response.clear();
+// }
if (ic_old_info.valid()) {
ic_old_info.destroy();
}
if (ic_new_info.valid()) {
ic_new_info.destroy();
}
- ic_arena.reset();
+// ic_arena.reset();
freeMsgBuffer();
-
- tunnel_mutex = 0;
- readahead_data = 0;
+//
+// tunnel_mutex = 0;
+// readahead_data = 0;
ic_hostname = 0;
}
-CacheContinuation():
- Continuation(NULL),
- magicno(MagicNo),
- callback_data(0),
- callback_data_2(0),
- timeout(0),
- target_machine(0),
- probe_depth(0),
- start_time(0),
- cache_read(false),
- result(0),
- result_error(0),
- seq_number(0),
- cfl_flags(0),
- frag_type(CACHE_FRAG_TYPE_NONE),
- nbytes(0),
- target_ip(0),
- request_opcode(0),
- request_purge(false),
- local_lookup_only(0),
- no_reply_message(0),
- request_timeout(0),
- expect_cache_callback(true),
- use_deferred_callback(0),
- pin_in_cache(0),
- rw_buf_msg_len(0),
- read_cluster_vc(0),
- write_cluster_vc(0),
- cluster_vc_channel(0),
- caller_buf_freebytes(0),
- readahead_vio(0),
- readahead_reader(0),
- have_all_data(false),
- cache_vc_info(),
- tunnel(0),
- tunnel_cont(0),
- tunnel_closed(0),
- lookup_open_write_vc_event(0),
- ic_arena(),
- ic_request(),
- ic_response(), ic_params(0), ic_old_info(), ic_new_info(), ic_hostname_len(0), cache_op_ClusterFunction(0) {
- token.clear();
- SET_HANDLER((CacheContHandler) & CacheContinuation::remoteOpEvent);
+ CacheContinuation(): magicno(MagicNo) {
+ size_to_init = sizeof(CacheContinuation) - (size_t) & ((CacheContinuation *) 0)->cs;
+ memset((char *) &cs, 0, size_to_init);
}
inline static bool is_ClusterThread(EThread * et)
@@ -334,14 +356,66 @@ CacheContinuation():
static void cacheContAllocator_free(CacheContinuation *);
inkcoreapi static Action *callback_failure(Action *, int, int, CacheContinuation * this_cc = 0);
static Action *do_remote_lookup(Continuation *, CacheKey *, CacheContinuation *, CacheFragType, char *, int);
- inkcoreapi static Action *do_op(Continuation *, ClusterMachine *, void *, int, char *, int,
- int nbytes = -1, MIOBuffer * b = 0);
+ inkcoreapi static Action *do_op(Continuation * c, ClusterSession cs, void *args,
+ int user_opcode, IOBufferData *data, int data_len, int nbytes = -1, MIOBuffer * b = 0);
static int setup_local_vc(char *data, int data_len, CacheContinuation * cc, ClusterMachine * mp, Action **);
static void disposeOfDataBuffer(void *buf);
static int handleDisposeEvent(int event, CacheContinuation * cc);
- static int32_t getObjectSize(VConnection *, int, CacheHTTPInfo *);
+ int32_t getObjectSize(VConnection *, int, CacheHTTPInfo *);
};
+extern ClassAllocator<CacheContinuation> cacheContAllocator;
+
+inline CacheContinuation *
+new_CacheCont(EThread *t) {
+ ink_assert(t == this_ethread());
+ CacheContinuation *c = cacheContAllocator.alloc();
+ c->mutex = new_ProxyMutex();
+ c->start_time = ink_get_hrtime();
+ c->thread = t;
+ return c;
+}
+
+inline void
+free_CacheCont(CacheContinuation *c) {
+ ink_assert(c->magicno == (int) c->MagicNo && !c->expect_next);
+// ink_assert(!c->cache_op_ClusterFunction);
+ if (c->pending_action) {
+ c->pending_action->cancel();
+ c->pending_action = NULL;
+ }
+ if (c->cache_vc) {
+ if (c->cache_vc->vio.op == VIO::READ)
+ c->cache_vc->do_io(VIO::CLOSE);
+ else
+ c->cache_vc->do_io(VIO::ABORT);
+ c->cache_vc = NULL;
+ }
+ if (c->mbuf) {
+ free_MIOBuffer(c->mbuf);
+ c->mbuf = NULL;
+ }
+
+ c->magicno = -1;
+ c->token.clear();
+ c->cache_vc_info.clear();
+ if (c->ic_params) {
+ delete c->ic_params;
+ c->ic_params = 0;
+ }
+ c->ic_request.clear();
+ c->ic_old_info.clear();
+ c->ic_new_info.destroy();
+ c->ic_arena.reset();
+ c->freeMsgBuffer();
+ c->ic_hostname = 0;
+ c->mutex.clear();
+
+ c->doc_data = NULL;
+
+ cacheContAllocator.free(c);
+}
+
/////////////////////////////////////////
// Cache OP specific args for do_op() //
/////////////////////////////////////////
@@ -595,18 +669,19 @@ struct CacheOpReplyMsg:public ClusterMessageHeader
{
uint32_t seq_number;
int32_t result;
- ClusterVCToken token;
- bool is_ram_cache_hit; // Entire object was from ram cache
- Alias32 moi; // Used by CACHE_OPEN_READ & CACHE_LINK reply
+ int32_t h_len;
+ int32_t d_len;
+ int32_t reason; // // Used by CACHE_OPEN_READ & CACHE_LINK reply
+ int64_t doc_size;
+
enum
{
MIN_VERSION = 1,
MAX_VERSION = 1,
CACHE_OP_REPLY_MESSAGE_VERSION = MAX_VERSION
};
- CacheOpReplyMsg(uint16_t vers = CACHE_OP_REPLY_MESSAGE_VERSION)
- : ClusterMessageHeader(vers), seq_number(0), result(0), is_ram_cache_hit(false) {
- moi.u32 = 0;
+ CacheOpReplyMsg(uint16_t vers = CACHE_OP_REPLY_MESSAGE_VERSION):
+ ClusterMessageHeader(vers), seq_number(0), result(0), h_len(0), d_len(0), reason(0), doc_size(0) {
}
//////////////////////////////////////////////////////////////////////////
@@ -617,7 +692,7 @@ struct CacheOpReplyMsg:public ClusterMessageHeader
}
static int sizeof_fixedlen_msg()
{
- return (int) ALIGN_DOUBLE(offsetof(CacheOpReplyMsg, moi));
+ return INK_ALIGN(sizeof (CacheOpReplyMsg), 16);
}
void init(uint16_t vers = CACHE_OP_REPLY_MESSAGE_VERSION) {
_init(vers);
@@ -627,12 +702,12 @@ struct CacheOpReplyMsg:public ClusterMessageHeader
if (NeedByteSwap()) {
ats_swap32(&seq_number);
ats_swap32((uint32_t *) & result);
- token.SwapBytes();
+ ats_swap32((uint32_t *) & reason);
+ ats_swap64((uint64_t *) & doc_size);
}
}
//////////////////////////////////////////////////////////////////////////
};
-
inline int
maxval(int a, int b)
{
@@ -795,6 +870,7 @@ event_reply_may_have_moi(int event)
{
switch (event) {
case CACHE_EVENT_OPEN_READ:
+ case CACHE_EVENT_OPEN_WRITE:
case CACHE_EVENT_LINK:
case CACHE_EVENT_LINK_FAILED:
case CACHE_EVENT_OPEN_READ_FAILED:
[5/6] refine the codes of cluster
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/62504a9f/iocore/cluster/ClusterCache.cc
----------------------------------------------------------------------
diff --git a/iocore/cluster/ClusterCache.cc b/iocore/cluster/ClusterCache.cc
index 8d4b6e5..9b3be6c 100644
--- a/iocore/cluster/ClusterCache.cc
+++ b/iocore/cluster/ClusterCache.cc
@@ -28,6 +28,10 @@
#include "P_Cluster.h"
+//ClassAllocator<ClusterBuffer> clusterBufferAllocator("clusterBufferAllocator");
+
+int CacheContinuation::size_to_init = -1;
+
#ifdef DEBUG
#define CLUSTER_TEST_DEBUG 1
#endif
@@ -53,31 +57,34 @@ int open_delay_events = 0;
// default will be read from config
int cache_migrate_on_demand = false;
-/////////////////
-// Static Data //
-/////////////////
-static ClassAllocator<CacheContinuation> cacheContAllocator("cacheContAllocator");
+ClassAllocator<CacheContinuation> cacheContAllocator("cacheContAllocator");
+ClassAllocator<ClusterCont> clusterContAllocator("clusterContAllocator");
-static Queue<CacheContinuation> remoteCacheContQueue[REMOTE_CONNECT_HASH];
-static Ptr<ProxyMutex> remoteCacheContQueueMutex[REMOTE_CONNECT_HASH];
+//static Queue<CacheContinuation> remoteCacheContQueue[REMOTE_CONNECT_HASH];
+//static Ptr<ProxyMutex> remoteCacheContQueueMutex[REMOTE_CONNECT_HASH];
// 0 is an illegal sequence number
#define CACHE_NO_RESPONSE 0
static int cluster_sequence_number = 1;
#ifdef CLUSTER_TEST_DEBUG
-static ink_hrtime cache_cluster_timeout = HRTIME_SECONDS(65536);
+//static ink_hrtime cache_cluster_timeout = HRTIME_SECONDS(65536);
#else
-static ink_hrtime cache_cluster_timeout = CACHE_CLUSTER_TIMEOUT;
+//static ink_hrtime cache_cluster_timeout = CACHE_CLUSTER_TIMEOUT;
#endif
///////////////////
// Declarations //
///////////////////
-static CacheContinuation *find_cache_continuation(unsigned int, unsigned int);
+//static CacheContinuation *find_cache_continuation(unsigned int, unsigned int);
static unsigned int new_cache_sequence_number();
+#ifdef DEBUG
+int64_t num_of_cachecontinuation = 0;
+int64_t num_of_cluster_cachevc = 0;
+#endif
+
#define DOT_SEPARATED(_x) \
((unsigned char*)&(_x))[0], ((unsigned char*)&(_x))[1], \
((unsigned char*)&(_x))[2], ((unsigned char*)&(_x))[3]
@@ -310,7 +317,7 @@ ClusterVConnectionCache::lookup(INK_MD5 * key)
}
int
-ClusterVConnectionCacheEvent::eventHandler(int /* event ATS_UNUSED */, Event * e)
+ClusterVConnectionCacheEvent::eventHandler(int , Event * e)
{
CLUSTER_INCREMENT_DYN_STAT(CLUSTER_VC_CACHE_SCANS_STAT);
MUTEX_TRY_LOCK(lock, cache->hash_lock[hash_index], this_ethread());
@@ -358,12 +365,12 @@ ClusterVConnectionCacheEvent::eventHandler(int /* event ATS_UNUSED */, Event * e
int
CacheContinuation::init()
{
- int n;
- for (n = 0; n < REMOTE_CONNECT_HASH; ++n)
- remoteCacheContQueueMutex[n] = new_ProxyMutex();
-
- GlobalOpenWriteVCcache = new ClusterVConnectionCache;
- GlobalOpenWriteVCcache->init();
+// int n;
+// for (n = 0; n < REMOTE_CONNECT_HASH; ++n)
+// remoteCacheContQueueMutex[n] = new_ProxyMutex();
+//
+// GlobalOpenWriteVCcache = new ClusterVConnectionCache;
+// GlobalOpenWriteVCcache->init();
return 0;
}
@@ -371,14 +378,269 @@ CacheContinuation::init()
// do_op()
// Main function to do a cluster cache operation
///////////////////////////////////////////////////////////////////////
+//Action *
+//CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
+// int user_opcode, char *data, int data_len, int nbytes, MIOBuffer * b)
+//{
+// CacheContinuation *cc = 0;
+// Action *act = 0;
+// char *msg = 0;
+//
+// /////////////////////////////////////////////////////////////////////
+// // Unconditionally map open read buffer interfaces to open read.
+// // open read buffer interfaces are now deprecated.
+// /////////////////////////////////////////////////////////////////////
+// int opcode = user_opcode;
+// switch (opcode) {
+// case CACHE_OPEN_READ_BUFFER:
+// opcode = CACHE_OPEN_READ;
+// break;
+// case CACHE_OPEN_READ_BUFFER_LONG:
+// opcode = CACHE_OPEN_READ_LONG;
+// break;
+// default:
+// break;
+// }
+//
+// if (!ch)
+// goto no_send_exit;
+//
+// if (c) {
+// cc = cacheContAllocator_alloc();
+// cc->ch = ch;
+// cc->target_machine = mp;
+// cc->request_opcode = opcode;
+// cc->mutex = c->mutex;
+// cc->action = c;
+// cc->action.cancelled = false;
+// cc->start_time = ink_get_hrtime();
+// cc->from = mp;
+// cc->result = op_failure(opcode);
+// SET_CONTINUATION_HANDLER(cc, (CacheContHandler)
+// & CacheContinuation::remoteOpEvent);
+// act = &cc->action;
+//
+// // set up sequence number so we can find this continuation
+//
+// cc->target_ip = mp->ip;
+// cc->seq_number = new_cache_sequence_number();
+//
+// // establish timeout for cache op
+//
+// unsigned int hash = FOLDHASH(cc->target_ip, cc->seq_number);
+// MUTEX_TRY_LOCK(queuelock, remoteCacheContQueueMutex[hash], this_ethread());
+// if (!queuelock) {
+//
+// // failed to acquire lock: no problem, retry later
+// cc->timeout = eventProcessor.schedule_in(cc, CACHE_RETRY_PERIOD, ET_CACHE_CONT_SM);
+// } else {
+// remoteCacheContQueue[hash].enqueue(cc);
+// MUTEX_RELEASE(queuelock);
+// cc->timeout = eventProcessor.schedule_in(cc, cache_cluster_timeout, ET_CACHE_CONT_SM);
+// }
+// }
+// //
+// // Determine the type of the "Over The Wire" (OTW) message header and
+// // initialize it.
+// //
+// Debug("cache_msg",
+// "do_op opcode=%d seqno=%d Machine=%p data=%p datalen=%d mio=%p",
+// opcode, (c ? cc->seq_number : CACHE_NO_RESPONSE), mp, data, data_len, b);
+//
+// switch (opcode) {
+// case CACHE_OPEN_WRITE_BUFFER:
+// case CACHE_OPEN_WRITE_BUFFER_LONG:
+// {
+// ink_release_assert(!"write buffer not supported");
+// break;
+// }
+// case CACHE_OPEN_READ_BUFFER:
+// case CACHE_OPEN_READ_BUFFER_LONG:
+// {
+// ink_release_assert(!"read buffer not supported");
+// break;
+// }
+// case CACHE_OPEN_WRITE:
+// case CACHE_OPEN_READ:
+// {
+// ink_release_assert(c > 0);
+// //////////////////////
+// // Use short format //
+// //////////////////////
+// if (!data) {
+// data_len = op_to_sizeof_fixedlen_msg(opcode);
+// data = (char *) ALLOCA_DOUBLE(data_len);
+// }
+// msg = (char *) data;
+// CacheOpMsg_short *m = (CacheOpMsg_short *) msg;
+// m->init();
+// m->opcode = opcode;
+// m->cfl_flags = ((CacheOpArgs_General *) args)->cfl_flags;
+// m->md5 = *((CacheOpArgs_General *) args)->url_md5;
+// cc->url_md5 = m->md5;
+// m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+// m->frag_type = ((CacheOpArgs_General *) args)->frag_type;
+// if (opcode == CACHE_OPEN_WRITE) {
+// m->nbytes = nbytes;
+// m->data = (uint32_t) ((CacheOpArgs_General *) args)->pin_in_cache;
+// } else {
+// m->nbytes = 0;
+// m->data = 0;
+// }
+//
+// if (opcode == CACHE_OPEN_READ) {
+// //
+// // Set upper limit on initial data received with response
+// // for open read response
+// //
+// m->buffer_size = DEFAULT_MAX_BUFFER_SIZE;
+// } else {
+// m->buffer_size = 0;
+// }
+//
+// //
+// // Establish the local VC
+// //
+// int res = setup_local_vc(msg, data_len, cc, mp, &act);
+// if (!res) {
+// /////////////////////////////////////////////////////
+// // Unable to setup local VC, request aborted.
+// // Remove request from pending list and deallocate.
+// /////////////////////////////////////////////////////
+// cc->remove_and_delete(0, (Event *) 0);
+// return act;
+//
+// } else if (res != -1) {
+// ///////////////////////////////////////
+// // VC established, send request
+// ///////////////////////////////////////
+// break;
+//
+// } else {
+// //////////////////////////////////////////////////////
+// // Unable to setup VC, delay required, await callback
+// //////////////////////////////////////////////////////
+// goto no_send_exit;
+// }
+// }
+//
+// case CACHE_OPEN_READ_LONG:
+// case CACHE_OPEN_WRITE_LONG:
+// {
+// ink_release_assert(c > 0);
+// //////////////////////
+// // Use long format //
+// //////////////////////
+// msg = data;
+// CacheOpMsg_long *m = (CacheOpMsg_long *) msg;
+// m->init();
+// m->opcode = opcode;
+// m->cfl_flags = ((CacheOpArgs_General *) args)->cfl_flags;
+// m->url_md5 = *((CacheOpArgs_General *) args)->url_md5;
+// cc->url_md5 = m->url_md5;
+// m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+// m->nbytes = nbytes;
+// m->data = (uint32_t) ((CacheOpArgs_General *) args)->pin_in_cache;
+// m->frag_type = (uint32_t) ((CacheOpArgs_General *) args)->frag_type;
+//
+// if (opcode == CACHE_OPEN_READ_LONG) {
+// //
+// // Set upper limit on initial data received with response
+// // for open read response
+// //
+// m->buffer_size = DEFAULT_MAX_BUFFER_SIZE;
+// } else {
+// m->buffer_size = 0;
+// }
+// //
+// // Establish the local VC
+// //
+// int res = setup_local_vc(msg, data_len, cc, mp, &act);
+// if (!res) {
+// /////////////////////////////////////////////////////
+// // Unable to setup local VC, request aborted.
+// // Remove request from pending list and deallocate.
+// /////////////////////////////////////////////////////
+// cc->remove_and_delete(0, (Event *) 0);
+// return act;
+//
+// } else if (res != -1) {
+// ///////////////////////////////////////
+// // VC established, send request
+// ///////////////////////////////////////
+// break;
+//
+// } else {
+// //////////////////////////////////////////////////////
+// // Unable to setup VC, delay required, await callback
+// //////////////////////////////////////////////////////
+// goto no_send_exit;
+// }
+// }
+// case CACHE_UPDATE:
+// case CACHE_REMOVE:
+// case CACHE_DEREF:
+// {
+// //////////////////////
+// // Use short format //
+// //////////////////////
+// msg = data;
+// CacheOpMsg_short *m = (CacheOpMsg_short *) msg;
+// m->init();
+// m->opcode = opcode;
+// m->frag_type = ((CacheOpArgs_Deref *) args)->frag_type;
+// m->cfl_flags = ((CacheOpArgs_Deref *) args)->cfl_flags;
+// if (opcode == CACHE_DEREF)
+// m->md5 = *((CacheOpArgs_Deref *) args)->md5;
+// else
+// m->md5 = *((CacheOpArgs_General *) args)->url_md5;
+// m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+// break;
+// }
+// case CACHE_LINK:
+// {
+// ////////////////////////
+// // Use short_2 format //
+// ////////////////////////
+// msg = data;
+// CacheOpMsg_short_2 *m = (CacheOpMsg_short_2 *) msg;
+// m->init();
+// m->opcode = opcode;
+// m->cfl_flags = ((CacheOpArgs_Link *) args)->cfl_flags;
+// m->md5_1 = *((CacheOpArgs_Link *) args)->from;
+// m->md5_2 = *((CacheOpArgs_Link *) args)->to;
+// m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+// m->frag_type = ((CacheOpArgs_Link *) args)->frag_type;
+// break;
+// }
+// default:
+// msg = 0;
+// break;
+// }
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_sndmsg((c ? cc->seq_number : CACHE_NO_RESPONSE), 0, "do_op");
+//#endif
+// clusterProcessor.invoke_remote(ch,
+// op_needs_marshalled_coi(opcode) ? CACHE_OP_MALLOCED_CLUSTER_FUNCTION
+// : CACHE_OP_CLUSTER_FUNCTION, (char *) msg, data_len);
+//
+//no_send_exit:
+// if (c) {
+// return act;
+// } else {
+// return (Action *) 0;
+// }
+//}
+
+
Action *
-CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
- int user_opcode, char *data, int data_len, int nbytes, MIOBuffer * b)
+CacheContinuation::do_op(Continuation * c, ClusterSession cs, void *args,
+ int user_opcode, IOBufferData *data, int data_len, int nbytes, MIOBuffer * b)
{
- CacheContinuation *cc = 0;
- Action *act = 0;
- char *msg = 0;
- ClusterHandler *ch = mp->pop_ClusterHandler();
+ ink_assert(data && !b);
+
+ ClusterCacheVC *ccvc = 0;
+ char *msg = data->data();
/////////////////////////////////////////////////////////////////////
// Unconditionally map open read buffer interfaces to open read.
@@ -396,50 +658,29 @@ CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
break;
}
- if (!ch)
- goto no_send_exit;
-
if (c) {
- cc = cacheContAllocator_alloc();
- cc->ch = ch;
- cc->target_machine = mp;
- cc->request_opcode = opcode;
- cc->mutex = c->mutex;
- cc->action = c;
- cc->action.cancelled = false;
- cc->start_time = ink_get_hrtime();
- cc->from = mp;
- cc->result = op_failure(opcode);
- SET_CONTINUATION_HANDLER(cc, (CacheContHandler)
- & CacheContinuation::remoteOpEvent);
- act = &cc->action;
-
- // set up sequence number so we can find this continuation
-
- cc->target_ip = mp->ip;
- cc->seq_number = new_cache_sequence_number();
-
- // establish timeout for cache op
-
- unsigned int hash = FOLDHASH(cc->target_ip, cc->seq_number);
- MUTEX_TRY_LOCK(queuelock, remoteCacheContQueueMutex[hash], this_ethread());
- if (!queuelock) {
-
- // failed to acquire lock: no problem, retry later
- cc->timeout = eventProcessor.schedule_in(cc, CACHE_RETRY_PERIOD, ET_CACHE_CONT_SM);
- } else {
- remoteCacheContQueue[hash].enqueue(cc);
- MUTEX_RELEASE(queuelock);
- cc->timeout = eventProcessor.schedule_in(cc, cache_cluster_timeout, ET_CACHE_CONT_SM);
+ ccvc = new_ClusterCacheVC(c);
+
+ if (opcode == CACHE_OPEN_READ || opcode == CACHE_OPEN_READ_LONG) {
+ SET_CONTINUATION_HANDLER(ccvc, &ClusterCacheVC::openReadStart);
+ ccvc->vio.op = VIO::READ;
+ ccvc->frag_type = ((CacheOpArgs_General *) args)->frag_type;
+ } else if (opcode == CACHE_OPEN_WRITE || opcode == CACHE_OPEN_WRITE_LONG) {
+ SET_CONTINUATION_HANDLER(ccvc, &ClusterCacheVC::openWriteStart);
+ ccvc->vio.op = VIO::WRITE;
+ ccvc->frag_type = ((CacheOpArgs_General *) args)->frag_type;
+ } else if (opcode == CACHE_REMOVE) {
+ SET_CONTINUATION_HANDLER(ccvc, &ClusterCacheVC::removeEvent);
+ ccvc->frag_type = ((CacheOpArgs_General *) args)->frag_type;
}
+
+ cluster_bind_session(cs, ccvc);
+ ccvc->cs = cs;
}
- //
- // Determine the type of the "Over The Wire" (OTW) message header and
- // initialize it.
- //
+
Debug("cache_msg",
- "do_op opcode=%d seqno=%d Machine=%p data=%p datalen=%d mio=%p",
- opcode, (c ? cc->seq_number : CACHE_NO_RESPONSE), mp, data, data_len, b);
+ "do_op opcode=%d data=%p datalen=%d mio=%p",
+ opcode, data, data_len, b);
switch (opcode) {
case CACHE_OPEN_WRITE_BUFFER:
@@ -461,61 +702,26 @@ CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
//////////////////////
// Use short format //
//////////////////////
- if (!data) {
- data_len = op_to_sizeof_fixedlen_msg(opcode);
- data = (char *) ALLOCA_DOUBLE(data_len);
- }
- msg = (char *) data;
CacheOpMsg_short *m = (CacheOpMsg_short *) msg;
m->init();
m->opcode = opcode;
m->cfl_flags = ((CacheOpArgs_General *) args)->cfl_flags;
m->md5 = *((CacheOpArgs_General *) args)->url_md5;
- cc->url_md5 = m->md5;
- m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+ //cc->url_md5 = m->md5;
+ m->seq_number = new_cache_sequence_number();
m->frag_type = ((CacheOpArgs_General *) args)->frag_type;
if (opcode == CACHE_OPEN_WRITE) {
m->nbytes = nbytes;
m->data = (uint32_t) ((CacheOpArgs_General *) args)->pin_in_cache;
+ ink_assert(ccvc);
+ ccvc->time_pin = ((CacheOpArgs_General *) args)->pin_in_cache;
} else {
m->nbytes = 0;
m->data = 0;
}
- if (opcode == CACHE_OPEN_READ) {
- //
- // Set upper limit on initial data received with response
- // for open read response
- //
- m->buffer_size = DEFAULT_MAX_BUFFER_SIZE;
- } else {
- m->buffer_size = 0;
- }
-
- //
- // Establish the local VC
- //
- int res = setup_local_vc(msg, data_len, cc, mp, &act);
- if (!res) {
- /////////////////////////////////////////////////////
- // Unable to setup local VC, request aborted.
- // Remove request from pending list and deallocate.
- /////////////////////////////////////////////////////
- cc->remove_and_delete(0, (Event *) 0);
- return act;
-
- } else if (res != -1) {
- ///////////////////////////////////////
- // VC established, send request
- ///////////////////////////////////////
- break;
-
- } else {
- //////////////////////////////////////////////////////
- // Unable to setup VC, delay required, await callback
- //////////////////////////////////////////////////////
- goto no_send_exit;
- }
+ m->buffer_size = 0;
+ break;
}
case CACHE_OPEN_READ_LONG:
@@ -525,51 +731,21 @@ CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
//////////////////////
// Use long format //
//////////////////////
- msg = data;
CacheOpMsg_long *m = (CacheOpMsg_long *) msg;
m->init();
m->opcode = opcode;
m->cfl_flags = ((CacheOpArgs_General *) args)->cfl_flags;
m->url_md5 = *((CacheOpArgs_General *) args)->url_md5;
- cc->url_md5 = m->url_md5;
- m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+ //cc->url_md5 = m->url_md5;
+ m->seq_number = new_cache_sequence_number();
m->nbytes = nbytes;
m->data = (uint32_t) ((CacheOpArgs_General *) args)->pin_in_cache;
+ ink_assert(ccvc);
+ ccvc->time_pin = (uint32_t) ((CacheOpArgs_General *) args)->pin_in_cache;
m->frag_type = (uint32_t) ((CacheOpArgs_General *) args)->frag_type;
- if (opcode == CACHE_OPEN_READ_LONG) {
- //
- // Set upper limit on initial data received with response
- // for open read response
- //
- m->buffer_size = DEFAULT_MAX_BUFFER_SIZE;
- } else {
- m->buffer_size = 0;
- }
- //
- // Establish the local VC
- //
- int res = setup_local_vc(msg, data_len, cc, mp, &act);
- if (!res) {
- /////////////////////////////////////////////////////
- // Unable to setup local VC, request aborted.
- // Remove request from pending list and deallocate.
- /////////////////////////////////////////////////////
- cc->remove_and_delete(0, (Event *) 0);
- return act;
-
- } else if (res != -1) {
- ///////////////////////////////////////
- // VC established, send request
- ///////////////////////////////////////
- break;
-
- } else {
- //////////////////////////////////////////////////////
- // Unable to setup VC, delay required, await callback
- //////////////////////////////////////////////////////
- goto no_send_exit;
- }
+ m->buffer_size = 0;
+ break;
}
case CACHE_UPDATE:
case CACHE_REMOVE:
@@ -578,7 +754,6 @@ CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
//////////////////////
// Use short format //
//////////////////////
- msg = data;
CacheOpMsg_short *m = (CacheOpMsg_short *) msg;
m->init();
m->opcode = opcode;
@@ -588,7 +763,7 @@ CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
m->md5 = *((CacheOpArgs_Deref *) args)->md5;
else
m->md5 = *((CacheOpArgs_General *) args)->url_md5;
- m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+ m->seq_number = new_cache_sequence_number();
break;
}
case CACHE_LINK:
@@ -596,369 +771,375 @@ CacheContinuation::do_op(Continuation * c, ClusterMachine * mp, void *args,
////////////////////////
// Use short_2 format //
////////////////////////
- msg = data;
CacheOpMsg_short_2 *m = (CacheOpMsg_short_2 *) msg;
m->init();
m->opcode = opcode;
m->cfl_flags = ((CacheOpArgs_Link *) args)->cfl_flags;
m->md5_1 = *((CacheOpArgs_Link *) args)->from;
m->md5_2 = *((CacheOpArgs_Link *) args)->to;
- m->seq_number = (c ? cc->seq_number : CACHE_NO_RESPONSE);
+ m->seq_number = new_cache_sequence_number();
m->frag_type = ((CacheOpArgs_Link *) args)->frag_type;
break;
}
default:
- msg = 0;
+ ink_release_assert(!"error request_op");
break;
}
#ifdef CACHE_MSG_TRACE
log_cache_op_sndmsg((c ? cc->seq_number : CACHE_NO_RESPONSE), 0, "do_op");
#endif
- clusterProcessor.invoke_remote(ch,
- op_needs_marshalled_coi(opcode) ? CACHE_OP_MALLOCED_CLUSTER_FUNCTION
- : CACHE_OP_CLUSTER_FUNCTION, (char *) msg, data_len);
-
-no_send_exit:
- if (c) {
- return act;
- } else {
- return (Action *) 0;
- }
-}
-
-int
-CacheContinuation::setup_local_vc(char *data, int data_len, CacheContinuation * cc, ClusterMachine * mp, Action ** act)
-{
- bool read_op = op_is_read(cc->request_opcode);
- bool short_msg = op_is_shortform(cc->request_opcode);
-
- // Alloc buffer, copy message and attach to continuation
- cc->setMsgBufferLen(data_len);
- cc->allocMsgBuffer();
- memcpy(cc->getMsgBuffer(), data, data_len);
-
- SET_CONTINUATION_HANDLER(cc, (CacheContHandler)
- & CacheContinuation::localVCsetupEvent);
-
- if (short_msg) {
- Debug("cache_proto", "open_local-s (%s) seqno=%d", (read_op ? "R" : "W"), ((CacheOpMsg_short *) data)->seq_number);
- } else {
- Debug("cache_proto", "open_local-l (%s) seqno=%d", (read_op ? "R" : "W"), ((CacheOpMsg_long *) data)->seq_number);
- }
- // Create local VC
- ClusterVConnection *vc;
+ IOBufferBlock *ret = new_IOBufferBlock(data, data_len, 0);
+ ret->_buf_end = ret->_end;
- if (!read_op && (cc->request_opcode == CACHE_OPEN_WRITE_LONG)) {
- // Determine if the open_write has already been established.
- vc = cc->lookupOpenWriteVC();
-
- } else {
- vc = clusterProcessor.open_local(cc, mp, cc->open_local_token,
- (CLUSTER_OPT_ALLOW_IMMEDIATE |
- (read_op ? CLUSTER_OPT_CONN_READ : CLUSTER_OPT_CONN_WRITE)));
- }
- if (!vc) {
- // Error, abort request
- if (short_msg) {
- Debug("cache_proto", "0open_local-s (%s) failed, seqno=%d",
- (read_op ? "R" : "W"), ((CacheOpMsg_short *) data)->seq_number);
- } else {
- Debug("cache_proto", "1open_local-l (%s) failed, seqno=%d",
- (read_op ? "R" : "W"), ((CacheOpMsg_long *) data)->seq_number);
- }
- cc->freeMsgBuffer();
- if (cc->timeout)
- cc->timeout->cancel();
- cc->timeout = NULL;
-
- // Post async failure callback on a different continuation.
- *act = callback_failure(&cc->action, (read_op ? CACHE_EVENT_OPEN_READ_FAILED : CACHE_EVENT_OPEN_WRITE_FAILED), 0);
- return 0;
-
- } else if (vc != CLUSTER_DELAYED_OPEN) {
- // We have established the VC
- if (read_op) {
- cc->read_cluster_vc = vc;
- } else {
- cc->write_cluster_vc = vc;
- }
- cc->cluster_vc_channel = vc->channel;
- vc->current_cont = cc;
-
- if (short_msg) {
- CacheOpMsg_short *ms = (CacheOpMsg_short *) data;
- ms->channel = vc->channel;
- ms->token = cc->open_local_token;
- Debug("cache_proto",
- "0open_local-s (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
- (read_op ? "R" : "W"), ms->seq_number, vc->channel, ms->token.ip_created, ms->token.sequence_number, vc);
- } else {
- CacheOpMsg_long *ml = (CacheOpMsg_long *) data;
- ml->channel = vc->channel;
- ml->token = cc->open_local_token;
- Debug("cache_proto",
- "1open_local-l (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
- (read_op ? "R" : "W"), ml->seq_number, vc->channel, ml->token.ip_created, ml->token.sequence_number, vc);
- }
- cc->freeMsgBuffer();
- SET_CONTINUATION_HANDLER(cc, (CacheContHandler)
- & CacheContinuation::remoteOpEvent);
- return 1;
-
- } else {
- //////////////////////////////////////////////////////
- // Unable to setup VC, delay required, await callback
- //////////////////////////////////////////////////////
- return -1;
- }
-}
-
-ClusterVConnection *
-CacheContinuation::lookupOpenWriteVC()
-{
- ///////////////////////////////////////////////////////////////
- // See if we already have an open_write ClusterVConnection
- // which was established in a previous remote open_read which
- // failed.
- ///////////////////////////////////////////////////////////////
- ClusterVConnection *vc;
- CacheOpMsg_long *ml = (CacheOpMsg_long *) getMsgBuffer();
-
- vc = GlobalOpenWriteVCcache->lookup(&ml->url_md5);
-
- if (vc == ((ClusterVConnection *) 0)) {
- // Retry lookup
- SET_CONTINUATION_HANDLER(this, (CacheContHandler)
- & CacheContinuation::lookupOpenWriteVCEvent);
- //
- // Note: In the lookupOpenWriteVCEvent handler, we use EVENT_IMMEDIATE
- // to distinguish the lookup retry from a request timeout
- // which uses EVENT_INTERVAL.
- //
- lookup_open_write_vc_event = eventProcessor.schedule_imm(this, ET_CACHE_CONT_SM);
-
- } else if (vc != ((ClusterVConnection *) - 1)) {
- // Hit, found open_write VC in cache.
- // Post open_write completion by simulating a
- // remote cache op result message.
-
- vc->action_ = action; // establish new continuation
-
- SET_CONTINUATION_HANDLER(this, (CacheContHandler)
- & CacheContinuation::localVCsetupEvent);
- this->handleEvent(CLUSTER_EVENT_OPEN_EXISTS, vc);
-
- CacheOpReplyMsg msg;
- int msglen;
-
- msglen = CacheOpReplyMsg::sizeof_fixedlen_msg();
- msg.result = CACHE_EVENT_OPEN_WRITE;
- msg.seq_number = seq_number;
- msg.token = vc->token;
-
- cache_op_result_ClusterFunction(ch, (void *) &msg, msglen);
-
- } else {
- // Miss, establish local VC and send remote open_write request
-
- SET_CONTINUATION_HANDLER(this, (CacheContHandler)
- & CacheContinuation::localVCsetupEvent);
- vc = clusterProcessor.open_local(this, from, open_local_token,
- (CLUSTER_OPT_ALLOW_IMMEDIATE | CLUSTER_OPT_CONN_WRITE));
- if (!vc) {
- this->handleEvent(CLUSTER_EVENT_OPEN_FAILED, 0);
-
- } else if (vc != CLUSTER_DELAYED_OPEN) {
- this->handleEvent(CLUSTER_EVENT_OPEN, vc);
- }
- }
- return CLUSTER_DELAYED_OPEN; // force completion in callback
-}
-
-int
-CacheContinuation::lookupOpenWriteVCEvent(int event, Event * e)
-{
- if (event == EVENT_IMMEDIATE) {
- // Retry open_write VC lookup
- lookupOpenWriteVC();
-
- } else {
- lookup_open_write_vc_event->cancel();
- SET_CONTINUATION_HANDLER(this, (CacheContHandler)
- & CacheContinuation::localVCsetupEvent);
- this->handleEvent(event, e);
- }
- return EVENT_DONE;
-}
-
-int
-CacheContinuation::remove_and_delete(int /* event ATS_UNUSED */, Event * e)
-{
- unsigned int hash = FOLDHASH(target_ip, seq_number);
- MUTEX_TRY_LOCK(queuelock, remoteCacheContQueueMutex[hash], this_ethread());
- if (queuelock) {
- if (remoteCacheContQueue[hash].in(this)) {
- remoteCacheContQueue[hash].remove(this);
- }
- MUTEX_RELEASE(queuelock);
- if (use_deferred_callback)
- callback_failure(&action, result, result_error, this);
- else
- cacheContAllocator_free(this);
-
- } else {
- SET_HANDLER((CacheContHandler) & CacheContinuation::remove_and_delete);
- if (!e) {
- timeout = eventProcessor.schedule_in(this, cache_cluster_timeout, ET_CACHE_CONT_SM);
- } else {
- e->schedule_in(cache_cluster_timeout);
+ if (!cluster_send_message(cs, CLUSTER_CACHE_OP_CLUSTER_FUNCTION, ret, -1, PRIORITY_HIGH)) {
+ if (ccvc) {
+ ccvc->in_progress = true;
+ cluster_set_events(cs, RESPONSE_EVENT_NOTIFY_DEALER);
+ return &ccvc->_action;
}
}
- return EVENT_DONE;
-}
-
-int
-CacheContinuation::localVCsetupEvent(int event, ClusterVConnection * vc)
-{
- ink_assert(magicno == (int) MagicNo);
- ink_assert(getMsgBuffer());
- bool short_msg = op_is_shortform(request_opcode);
- bool read_op = op_is_read(request_opcode);
-
- if (event == EVENT_INTERVAL) {
- Event *e = (Event *) vc;
- unsigned int hash = FOLDHASH(target_ip, seq_number);
-
- MUTEX_TRY_LOCK(queuelock, remoteCacheContQueueMutex[hash], e->ethread);
- if (!queuelock) {
- e->schedule_in(CACHE_RETRY_PERIOD);
- return EVENT_CONT;
- }
-
- if (!remoteCacheContQueue[hash].in(this)) {
- ////////////////////////////////////////////////////
- // Not yet queued on outstanding operations list
- ////////////////////////////////////////////////////
- remoteCacheContQueue[hash].enqueue(this);
- ink_assert(timeout == e);
- MUTEX_RELEASE(queuelock);
- e->schedule_in(cache_cluster_timeout);
- return EVENT_CONT;
-
- } else {
- /////////////////////////////////////////////////////
- // Timeout occurred
- /////////////////////////////////////////////////////
- remoteCacheContQueue[hash].remove(this);
- MUTEX_RELEASE(queuelock);
- Debug("cluster_timeout", "0cluster op timeout %d", seq_number);
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_REMOTE_OP_TIMEOUTS_STAT);
- timeout = (Event *) 1; // Note timeout
- /////////////////////////////////////////////////////////////////
- // Note: Failure callback is sent now, but the deallocation of
- // the CacheContinuation is deferred until we receive the
- // open_local() callback.
- /////////////////////////////////////////////////////////////////
- if (!action.cancelled)
- action.continuation->handleEvent((read_op ? CACHE_EVENT_OPEN_READ_FAILED : CACHE_EVENT_OPEN_WRITE_FAILED), 0);
- return EVENT_DONE;
- }
-
- } else if (((event == CLUSTER_EVENT_OPEN) || (event == CLUSTER_EVENT_OPEN_EXISTS))
- && (((ptrdiff_t) timeout & (ptrdiff_t) 1) == 0)) {
- ink_hrtime now;
- now = ink_get_hrtime();
- CLUSTER_SUM_DYN_STAT(CLUSTER_OPEN_DELAY_TIME_STAT, now - start_time);
- LOG_EVENT_TIME(start_time, open_delay_time_dist, open_delay_events);
- if (read_op) {
- read_cluster_vc = vc;
- } else {
- write_cluster_vc = vc;
- }
- cluster_vc_channel = vc->channel;
- vc->current_cont = this;
-
- if (short_msg) {
- CacheOpMsg_short *ms = (CacheOpMsg_short *) getMsgBuffer();
- ms->channel = vc->channel;
- ms->token = open_local_token;
-
- Debug("cache_proto",
- "2open_local-s (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
- (read_op ? "R" : "W"), ms->seq_number, vc->channel, ms->token.ip_created, ms->token.sequence_number, vc);
-
- } else {
- CacheOpMsg_long *ml = (CacheOpMsg_long *) getMsgBuffer();
- ml->channel = vc->channel;
- ml->token = open_local_token;
-
- Debug("cache_proto",
- "3open_local-l (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
- (read_op ? "R" : "W"), ml->seq_number, vc->channel, ml->token.ip_created, ml->token.sequence_number, vc);
- }
- SET_HANDLER((CacheContHandler) & CacheContinuation::remoteOpEvent);
-
- if (event != CLUSTER_EVENT_OPEN_EXISTS) {
- // Send request message
- clusterProcessor.invoke_remote(ch,
- (op_needs_marshalled_coi(request_opcode) ?
- CACHE_OP_MALLOCED_CLUSTER_FUNCTION :
- CACHE_OP_CLUSTER_FUNCTION), (char *) getMsgBuffer(), getMsgBufferLen());
- }
-
- } else {
- int send_failure_callback = 1;
-
- if (((ptrdiff_t) timeout & (ptrdiff_t) 1) == 0) {
- if (short_msg) {
- Debug("cache_proto", "2open_local-s (%s) failed, seqno=%d",
- (read_op ? "R" : "W"), ((CacheOpMsg_short *) getMsgBuffer())->seq_number);
- } else {
- Debug("cache_proto", "3open_local-l (%s) failed, seqno=%d",
- (read_op ? "R" : "W"), ((CacheOpMsg_long *) getMsgBuffer())->seq_number);
- }
-
- } else {
- Debug("cache_proto", "4open_local cancelled due to timeout, seqno=%d", seq_number);
- this->timeout = 0;
-
- // Deallocate VC if successfully acquired
-
- if (event == CLUSTER_EVENT_OPEN) {
- vc->pending_remote_fill = 0;
- vc->remote_closed = 1; // avoid remote close msg
- vc->do_io(VIO::CLOSE);
- }
- send_failure_callback = 0; // already sent.
- }
-
- if (this->timeout)
- this->timeout->cancel();
- this->timeout = NULL;
-
- freeMsgBuffer();
- if (send_failure_callback) {
- //
- // Action corresponding to "this" already sent back to user,
- // use "this" to establish the failure callback after
- // removing ourselves from the active list.
- //
- this->use_deferred_callback = true;
- this->result = (read_op ? CACHE_EVENT_OPEN_READ_FAILED : CACHE_EVENT_OPEN_WRITE_FAILED);
- this->result_error = 0;
- remove_and_delete(0, (Event *) 0);
-
- } else {
- cacheContAllocator_free(this);
- }
- return EVENT_DONE;
+ cluster_close_session(cs);
+ if (ccvc) {
+ ccvc->session_closed = true;
+ free_ClusterCacheVC(ccvc);
}
- // Free message
- freeMsgBuffer();
-
- return EVENT_DONE;
+ return 0;
}
+//int
+//CacheContinuation::setup_local_vc(char *data, int data_len, CacheContinuation * cc, ClusterMachine * mp, Action ** act)
+//{
+// bool read_op = op_is_read(cc->request_opcode);
+// bool short_msg = op_is_shortform(cc->request_opcode);
+//
+// // Alloc buffer, copy message and attach to continuation
+// cc->setMsgBufferLen(data_len);
+// cc->allocMsgBuffer();
+// memcpy(cc->getMsgBuffer(), data, data_len);
+//
+// SET_CONTINUATION_HANDLER(cc, (CacheContHandler)
+// & CacheContinuation::localVCsetupEvent);
+//
+// if (short_msg) {
+// Debug("cache_proto", "open_local-s (%s) seqno=%d", (read_op ? "R" : "W"), ((CacheOpMsg_short *) data)->seq_number);
+// } else {
+// Debug("cache_proto", "open_local-l (%s) seqno=%d", (read_op ? "R" : "W"), ((CacheOpMsg_long *) data)->seq_number);
+// }
+//
+// // Create local VC
+// ClusterVConnection *vc;
+//
+// if (!read_op && (cc->request_opcode == CACHE_OPEN_WRITE_LONG)) {
+// // Determine if the open_write has already been established.
+// vc = cc->lookupOpenWriteVC();
+//
+// } else {
+// vc = clusterProcessor.open_local(cc, mp, cc->open_local_token,
+// (CLUSTER_OPT_ALLOW_IMMEDIATE |
+// (read_op ? CLUSTER_OPT_CONN_READ : CLUSTER_OPT_CONN_WRITE)));
+// }
+// if (!vc) {
+// // Error, abort request
+// if (short_msg) {
+// Debug("cache_proto", "0open_local-s (%s) failed, seqno=%d",
+// (read_op ? "R" : "W"), ((CacheOpMsg_short *) data)->seq_number);
+// } else {
+// Debug("cache_proto", "1open_local-l (%s) failed, seqno=%d",
+// (read_op ? "R" : "W"), ((CacheOpMsg_long *) data)->seq_number);
+// }
+// cc->freeMsgBuffer();
+// if (cc->timeout)
+// cc->timeout->cancel();
+// cc->timeout = NULL;
+//
+// // Post async failure callback on a different continuation.
+// *act = callback_failure(&cc->action, (read_op ? CACHE_EVENT_OPEN_READ_FAILED : CACHE_EVENT_OPEN_WRITE_FAILED), 0);
+// return 0;
+//
+// } else if (vc != CLUSTER_DELAYED_OPEN) {
+// // We have established the VC
+// if (read_op) {
+// cc->read_cluster_vc = vc;
+// } else {
+// cc->write_cluster_vc = vc;
+// }
+// cc->cluster_vc_channel = vc->channel;
+// vc->current_cont = cc;
+//
+// if (short_msg) {
+// CacheOpMsg_short *ms = (CacheOpMsg_short *) data;
+// ms->channel = vc->channel;
+// ms->token = cc->open_local_token;
+// Debug("cache_proto",
+// "0open_local-s (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
+// (read_op ? "R" : "W"), ms->seq_number, vc->channel, ms->token.ip_created, ms->token.sequence_number, vc);
+// } else {
+// CacheOpMsg_long *ml = (CacheOpMsg_long *) data;
+// ml->channel = vc->channel;
+// ml->token = cc->open_local_token;
+// Debug("cache_proto",
+// "1open_local-l (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
+// (read_op ? "R" : "W"), ml->seq_number, vc->channel, ml->token.ip_created, ml->token.sequence_number, vc);
+// }
+// cc->freeMsgBuffer();
+// SET_CONTINUATION_HANDLER(cc, (CacheContHandler)
+// & CacheContinuation::remoteOpEvent);
+// return 1;
+//
+// } else {
+// //////////////////////////////////////////////////////
+// // Unable to setup VC, delay required, await callback
+// //////////////////////////////////////////////////////
+// return -1;
+// }
+//}
+//
+//ClusterVConnection *
+//CacheContinuation::lookupOpenWriteVC()
+//{
+// ///////////////////////////////////////////////////////////////
+// // See if we already have an open_write ClusterVConnection
+// // which was established in a previous remote open_read which
+// // failed.
+// ///////////////////////////////////////////////////////////////
+// ClusterVConnection *vc;
+// CacheOpMsg_long *ml = (CacheOpMsg_long *) getMsgBuffer();
+//
+// vc = GlobalOpenWriteVCcache->lookup(&ml->url_md5);
+//
+// if (vc == ((ClusterVConnection *) 0)) {
+// // Retry lookup
+// SET_CONTINUATION_HANDLER(this, (CacheContHandler)
+// & CacheContinuation::lookupOpenWriteVCEvent);
+// //
+// // Note: In the lookupOpenWriteVCEvent handler, we use EVENT_IMMEDIATE
+// // to distinguish the lookup retry from a request timeout
+// // which uses EVENT_INTERVAL.
+// //
+// lookup_open_write_vc_event = eventProcessor.schedule_imm(this, ET_CACHE_CONT_SM);
+//
+// } else if (vc != ((ClusterVConnection *) - 1)) {
+// // Hit, found open_write VC in cache.
+// // Post open_write completion by simulating a
+// // remote cache op result message.
+//
+// vc->action_ = action; // establish new continuation
+//
+// SET_CONTINUATION_HANDLER(this, (CacheContHandler)
+// & CacheContinuation::localVCsetupEvent);
+// this->handleEvent(CLUSTER_EVENT_OPEN_EXISTS, vc);
+//
+// CacheOpReplyMsg msg;
+// int msglen;
+//
+// msglen = CacheOpReplyMsg::sizeof_fixedlen_msg();
+// msg.result = CACHE_EVENT_OPEN_WRITE;
+// msg.seq_number = seq_number;
+// msg.token = vc->token;
+//
+// cache_op_result_ClusterFunction(ch, (void *) &msg, msglen);
+//
+// } else {
+// // Miss, establish local VC and send remote open_write request
+//
+// SET_CONTINUATION_HANDLER(this, (CacheContHandler)
+// & CacheContinuation::localVCsetupEvent);
+// vc = clusterProcessor.open_local(this, from, open_local_token,
+// (CLUSTER_OPT_ALLOW_IMMEDIATE | CLUSTER_OPT_CONN_WRITE));
+// if (!vc) {
+// this->handleEvent(CLUSTER_EVENT_OPEN_FAILED, 0);
+//
+// } else if (vc != CLUSTER_DELAYED_OPEN) {
+// this->handleEvent(CLUSTER_EVENT_OPEN, vc);
+// }
+// }
+// return CLUSTER_DELAYED_OPEN; // force completion in callback
+//}
+//
+//int
+//CacheContinuation::lookupOpenWriteVCEvent(int event, Event * e)
+//{
+// if (event == EVENT_IMMEDIATE) {
+// // Retry open_write VC lookup
+// lookupOpenWriteVC();
+//
+// } else {
+// lookup_open_write_vc_event->cancel();
+// SET_CONTINUATION_HANDLER(this, (CacheContHandler)
+// & CacheContinuation::localVCsetupEvent);
+// this->handleEvent(event, e);
+// }
+// return EVENT_DONE;
+//}
+//
+//int
+//CacheContinuation::remove_and_delete(int event, Event * e)
+//{
+// NOWARN_UNUSED(event);
+// unsigned int hash = FOLDHASH(target_ip, seq_number);
+// MUTEX_TRY_LOCK(queuelock, remoteCacheContQueueMutex[hash], this_ethread());
+// if (queuelock) {
+// if (remoteCacheContQueue[hash].in(this)) {
+// remoteCacheContQueue[hash].remove(this);
+// }
+// MUTEX_RELEASE(queuelock);
+// if (use_deferred_callback)
+// callback_failure(&action, result, result_error, this);
+// else
+// cacheContAllocator_free(this);
+//
+// } else {
+// SET_HANDLER((CacheContHandler) & CacheContinuation::remove_and_delete);
+// if (!e) {
+// timeout = eventProcessor.schedule_in(this, cache_cluster_timeout, ET_CACHE_CONT_SM);
+// } else {
+// e->schedule_in(cache_cluster_timeout);
+// }
+// }
+// return EVENT_DONE;
+//}
+//
+//int
+//CacheContinuation::localVCsetupEvent(int event, ClusterVConnection * vc)
+//{
+// ink_assert(magicno == (int) MagicNo);
+// ink_assert(getMsgBuffer());
+// bool short_msg = op_is_shortform(request_opcode);
+// bool read_op = op_is_read(request_opcode);
+//
+// if (event == EVENT_INTERVAL) {
+// Event *e = (Event *) vc;
+// unsigned int hash = FOLDHASH(target_ip, seq_number);
+//
+// MUTEX_TRY_LOCK(queuelock, remoteCacheContQueueMutex[hash], e->ethread);
+// if (!queuelock) {
+// e->schedule_in(CACHE_RETRY_PERIOD);
+// return EVENT_CONT;
+// }
+//
+// if (!remoteCacheContQueue[hash].in(this)) {
+// ////////////////////////////////////////////////////
+// // Not yet queued on outstanding operations list
+// ////////////////////////////////////////////////////
+// remoteCacheContQueue[hash].enqueue(this);
+// ink_assert(timeout == e);
+// MUTEX_RELEASE(queuelock);
+// e->schedule_in(cache_cluster_timeout);
+// return EVENT_CONT;
+//
+// } else {
+// /////////////////////////////////////////////////////
+// // Timeout occurred
+// /////////////////////////////////////////////////////
+// remoteCacheContQueue[hash].remove(this);
+// MUTEX_RELEASE(queuelock);
+// Debug("cluster_timeout", "0cluster op timeout %d", seq_number);
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_REMOTE_OP_TIMEOUTS_STAT);
+// timeout = (Event *) 1; // Note timeout
+// /////////////////////////////////////////////////////////////////
+// // Note: Failure callback is sent now, but the deallocation of
+// // the CacheContinuation is deferred until we receive the
+// // open_local() callback.
+// /////////////////////////////////////////////////////////////////
+// if (!action.cancelled)
+// action.continuation->handleEvent((read_op ? CACHE_EVENT_OPEN_READ_FAILED : CACHE_EVENT_OPEN_WRITE_FAILED), 0);
+// return EVENT_DONE;
+// }
+//
+// } else if (((event == CLUSTER_EVENT_OPEN) || (event == CLUSTER_EVENT_OPEN_EXISTS))
+// && (((ptrdiff_t) timeout & (ptrdiff_t) 1) == 0)) {
+// ink_hrtime now;
+// now = ink_get_hrtime();
+// CLUSTER_SUM_DYN_STAT(CLUSTER_OPEN_DELAY_TIME_STAT, now - start_time);
+// LOG_EVENT_TIME(start_time, open_delay_time_dist, open_delay_events);
+// if (read_op) {
+// read_cluster_vc = vc;
+// } else {
+// write_cluster_vc = vc;
+// }
+// cluster_vc_channel = vc->channel;
+// vc->current_cont = this;
+//
+// if (short_msg) {
+// CacheOpMsg_short *ms = (CacheOpMsg_short *) getMsgBuffer();
+// ms->channel = vc->channel;
+// ms->token = open_local_token;
+//
+// Debug("cache_proto",
+// "2open_local-s (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
+// (read_op ? "R" : "W"), ms->seq_number, vc->channel, ms->token.ip_created, ms->token.sequence_number, vc);
+//
+// } else {
+// CacheOpMsg_long *ml = (CacheOpMsg_long *) getMsgBuffer();
+// ml->channel = vc->channel;
+// ml->token = open_local_token;
+//
+// Debug("cache_proto",
+// "3open_local-l (%s) success, seqno=%d chan=%d token=%d,%d VC=%p",
+// (read_op ? "R" : "W"), ml->seq_number, vc->channel, ml->token.ip_created, ml->token.sequence_number, vc);
+// }
+// SET_HANDLER((CacheContHandler) & CacheContinuation::remoteOpEvent);
+//
+// if (event != CLUSTER_EVENT_OPEN_EXISTS) {
+// // Send request message
+// clusterProcessor.invoke_remote(ch,
+// (op_needs_marshalled_coi(request_opcode) ?
+// CACHE_OP_MALLOCED_CLUSTER_FUNCTION :
+// CACHE_OP_CLUSTER_FUNCTION), (char *) getMsgBuffer(), getMsgBufferLen());
+// }
+//
+// } else {
+// int send_failure_callback = 1;
+//
+// if (((ptrdiff_t) timeout & (ptrdiff_t) 1) == 0) {
+// if (short_msg) {
+// Debug("cache_proto", "2open_local-s (%s) failed, seqno=%d",
+// (read_op ? "R" : "W"), ((CacheOpMsg_short *) getMsgBuffer())->seq_number);
+// } else {
+// Debug("cache_proto", "3open_local-l (%s) failed, seqno=%d",
+// (read_op ? "R" : "W"), ((CacheOpMsg_long *) getMsgBuffer())->seq_number);
+// }
+//
+// } else {
+// Debug("cache_proto", "4open_local cancelled due to timeout, seqno=%d", seq_number);
+// this->timeout = 0;
+//
+// // Deallocate VC if successfully acquired
+//
+// if (event == CLUSTER_EVENT_OPEN) {
+// vc->pending_remote_fill = 0;
+// vc->remote_closed = 1; // avoid remote close msg
+// vc->do_io(VIO::CLOSE);
+// }
+// send_failure_callback = 0; // already sent.
+// }
+//
+// if (this->timeout)
+// this->timeout->cancel();
+// this->timeout = NULL;
+//
+// freeMsgBuffer();
+// if (send_failure_callback) {
+// //
+// // Action corresponding to "this" already sent back to user,
+// // use "this" to establish the failure callback after
+// // removing ourselves from the active list.
+// //
+// this->use_deferred_callback = true;
+// this->result = (read_op ? CACHE_EVENT_OPEN_READ_FAILED : CACHE_EVENT_OPEN_WRITE_FAILED);
+// this->result_error = 0;
+// remove_and_delete(0, (Event *) 0);
+//
+// } else {
+// cacheContAllocator_free(this);
+// }
+// return EVENT_DONE;
+// }
+// // Free message
+// freeMsgBuffer();
+//
+// return EVENT_DONE;
+//}
///////////////////////////////////////////////////////////////////////////
// cache_op_ClusterFunction()
@@ -995,14 +1176,13 @@ unmarshal_CacheOpMsg_short_2(void *data, int NeedByteSwap)
// init_from_long() support routine for cache_op_ClusterFunction()
inline void
-init_from_long(CacheContinuation * cont, CacheOpMsg_long * msg, ClusterMachine * m)
+init_from_long(CacheContinuation * cont, CacheOpMsg_long * msg)
{
- cont->no_reply_message = (msg->seq_number == CACHE_NO_RESPONSE);
+// cont->no_reply_message = (msg->seq_number == CACHE_NO_RESPONSE);
cont->seq_number = msg->seq_number;
cont->cfl_flags = msg->cfl_flags;
- cont->from = m;
cont->url_md5 = msg->url_md5;
- cont->cluster_vc_channel = msg->channel;
+// cont->cluster_vc_channel = msg->channel;
cont->frag_type = (CacheFragType) msg->frag_type;
if ((cont->request_opcode == CACHE_OPEN_WRITE_LONG)
|| (cont->request_opcode == CACHE_OPEN_READ_LONG)) {
@@ -1013,23 +1193,22 @@ init_from_long(CacheContinuation * cont, CacheOpMsg_long * msg, ClusterMachine *
cont->token = msg->token;
cont->nbytes = (((int) msg->nbytes < 0) ? 0 : msg->nbytes);
- if (cont->request_opcode == CACHE_OPEN_READ_LONG) {
- cont->caller_buf_freebytes = msg->buffer_size;
- } else {
- cont->caller_buf_freebytes = 0;
- }
+// if (cont->request_opcode == CACHE_OPEN_READ_LONG) {
+// cont->caller_buf_freebytes = msg->buffer_size;
+// } else {
+// cont->caller_buf_freebytes = 0;
+// }
}
// init_from_short() support routine for cache_op_ClusterFunction()
inline void
-init_from_short(CacheContinuation * cont, CacheOpMsg_short * msg, ClusterMachine * m)
+init_from_short(CacheContinuation * cont, CacheOpMsg_short * msg)
{
- cont->no_reply_message = (msg->seq_number == CACHE_NO_RESPONSE);
+// cont->no_reply_message = (msg->seq_number == CACHE_NO_RESPONSE);
cont->seq_number = msg->seq_number;
cont->cfl_flags = msg->cfl_flags;
- cont->from = m;
cont->url_md5 = msg->md5;
- cont->cluster_vc_channel = msg->channel;
+// cont->cluster_vc_channel = msg->channel;
cont->token = msg->token;
cont->nbytes = (((int) msg->nbytes < 0) ? 0 : msg->nbytes);
cont->frag_type = (CacheFragType) msg->frag_type;
@@ -1040,29 +1219,436 @@ init_from_short(CacheContinuation * cont, CacheOpMsg_short * msg, ClusterMachine
cont->pin_in_cache = 0;
}
- if (cont->request_opcode == CACHE_OPEN_READ) {
- cont->caller_buf_freebytes = msg->buffer_size;
- } else {
- cont->caller_buf_freebytes = 0;
- }
+// if (cont->request_opcode == CACHE_OPEN_READ) {
+// cont->caller_buf_freebytes = msg->buffer_size;
+// } else {
+// cont->caller_buf_freebytes = 0;
+// }
}
// init_from_short_2() support routine for cache_op_ClusterFunction()
inline void
-init_from_short_2(CacheContinuation * cont, CacheOpMsg_short_2 * msg, ClusterMachine * m)
+init_from_short_2(CacheContinuation * cont, CacheOpMsg_short_2 * msg)
{
- cont->no_reply_message = (msg->seq_number == CACHE_NO_RESPONSE);
+// cont->no_reply_message = (msg->seq_number == CACHE_NO_RESPONSE);
cont->seq_number = msg->seq_number;
cont->cfl_flags = msg->cfl_flags;
- cont->from = m;
cont->url_md5 = msg->md5_1;
cont->frag_type = (CacheFragType) msg->frag_type;
}
+//void
+//cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
+//{
+// EThread *thread = this_ethread();
+// ProxyMutex *mutex = thread->mutex;
+// ////////////////////////////////////////////////////////
+// // Note: we are running on the ET_CLUSTER thread
+// ////////////////////////////////////////////////////////
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CACHE_OUTSTANDING_STAT);
+//
+// int opcode;
+// ClusterMessageHeader *mh = (ClusterMessageHeader *) data;
+//
+// if (mh->GetMsgVersion() != CacheOpMsg_long::CACHE_OP_LONG_MESSAGE_VERSION) { ////////////////////////////////////////////////
+// // Convert from old to current message format
+// ////////////////////////////////////////////////
+// ink_release_assert(!"cache_op_ClusterFunction() bad msg version");
+// }
+// opcode = ((CacheOpMsg_long *) data)->opcode;
+//
+// // If necessary, create a continuation to reflect the response back
+//
+// CacheContinuation *c = CacheContinuation::cacheContAllocator_alloc();
+// c->mutex = new_ProxyMutex();
+// MUTEX_TRY_LOCK(lock, c->mutex, this_ethread());
+// c->request_opcode = opcode;
+// c->token.clear();
+// c->start_time = ink_get_hrtime();
+// c->ch = ch;
+// SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+// & CacheContinuation::replyOpEvent);
+//
+// switch (opcode) {
+// case CACHE_OPEN_WRITE_BUFFER:
+// case CACHE_OPEN_WRITE_BUFFER_LONG:
+// ink_release_assert(!"cache_op_ClusterFunction WRITE_BUFFER not supported");
+// break;
+//
+// case CACHE_OPEN_READ_BUFFER:
+// case CACHE_OPEN_READ_BUFFER_LONG:
+// ink_release_assert(!"cache_op_ClusterFunction READ_BUFFER not supported");
+// break;
+//
+// case CACHE_OPEN_READ:
+// {
+// CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
+// init_from_short(c, msg, ch->machine);
+// Debug("cache_msg",
+// "cache_op-s op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+// //
+// // Establish the remote side of the ClusterVConnection
+// //
+// c->write_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
+// &c->token,
+// c->cluster_vc_channel,
+// (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_READ));
+// if (!c->write_cluster_vc) {
+// // Unable to setup channel, abort processing.
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
+// Debug("chan_inuse",
+// "1Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
+// c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
+//
+// // Send cluster op failed reply
+// c->replyOpEvent(CACHE_EVENT_OPEN_READ_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
+// break;
+//
+// } else {
+// c->write_cluster_vc->current_cont = c;
+// }
+// ink_release_assert(c->write_cluster_vc != CLUSTER_DELAYED_OPEN);
+// ink_release_assert((opcode == CACHE_OPEN_READ)
+// || c->write_cluster_vc->pending_remote_fill);
+//
+// SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+// & CacheContinuation::setupVCdataRead);
+// Debug("cache_proto",
+// "0read op, seqno=%d chan=%d bufsize=%d token=%d,%d",
+// msg->seq_number, msg->channel, msg->buffer_size, msg->token.ip_created, msg->token.sequence_number);
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_msg(msg->seq_number, len, "cache_op_open_read");
+//#endif
+// CacheKey key(msg->md5);
+//
+// char *hostname = NULL;
+// int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+// if (host_len) {
+// hostname = (char *) msg->moi;
+// }
+// Cache *call_cache = caches[c->frag_type];
+// c->cache_action = call_cache->open_read(c, &key, c->frag_type, hostname, host_len);
+// break;
+// }
+// case CACHE_OPEN_READ_LONG:
+// {
+// // Cache needs message data, copy it.
+// c->setMsgBufferLen(len);
+// c->allocMsgBuffer();
+// memcpy(c->getMsgBuffer(), (char *) data, len);
+//
+// int flen = CacheOpMsg_long::sizeof_fixedlen_msg();
+// CacheOpMsg_long *msg = unmarshal_CacheOpMsg_long(c->getMsgBuffer(), mh->NeedByteSwap());
+// init_from_long(c, msg, ch->machine);
+// Debug("cache_msg",
+// "cache_op-l op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_msg(msg->seq_number, len, "cache_op_open_read_long");
+//#endif
+// //
+// // Establish the remote side of the ClusterVConnection
+// //
+// c->write_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
+// &c->token,
+// c->cluster_vc_channel,
+// (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_READ));
+// if (!c->write_cluster_vc) {
+// // Unable to setup channel, abort processing.
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
+// Debug("chan_inuse",
+// "2Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
+// c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
+//
+// // Send cluster op failed reply
+// c->replyOpEvent(CACHE_EVENT_OPEN_READ_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
+// break;
+//
+// } else {
+// c->write_cluster_vc->current_cont = c;
+// }
+// ink_release_assert(c->write_cluster_vc != CLUSTER_DELAYED_OPEN);
+// ink_release_assert((opcode == CACHE_OPEN_READ_LONG)
+// || c->write_cluster_vc->pending_remote_fill);
+//
+// SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+// & CacheContinuation::setupReadWriteVC);
+// Debug("cache_proto",
+// "1read op, seqno=%d chan=%d bufsize=%d token=%d,%d",
+// msg->seq_number, msg->channel, msg->buffer_size, msg->token.ip_created, msg->token.sequence_number);
+//
+// const char *p = (const char *) msg + flen;
+// int moi_len = len - flen;
+// int res;
+//
+// ink_assert(moi_len > 0);
+//
+// // Unmarshal CacheHTTPHdr
+// res = c->ic_request.unmarshal((char *) p, moi_len, NULL);
+// ink_assert(res > 0);
+// ink_assert(c->ic_request.valid());
+// c->request_purge = c->ic_request.method_get_wksidx() == HTTP_WKSIDX_PURGE || c->ic_request.method_get_wksidx() == HTTP_WKSIDX_DELETE;
+// moi_len -= res;
+// p += res;
+// ink_assert(moi_len > 0);
+// // Unmarshal CacheLookupHttpConfig
+// c->ic_params = new(CacheLookupHttpConfigAllocator.alloc())
+// CacheLookupHttpConfig();
+// res = c->ic_params->unmarshal(&c->ic_arena, (const char *) p, moi_len);
+// ink_assert(res > 0);
+//
+// moi_len -= res;
+// p += res;
+//
+// CacheKey key(msg->url_md5);
+//
+// char *hostname = NULL;
+// int host_len = 0;
+//
+// if (moi_len) {
+// hostname = (char *) p;
+// host_len = moi_len;
+//
+// // Save hostname and attach it to the continuation since we may
+// // need it if we convert this to an open_write.
+//
+// c->ic_hostname = new_IOBufferData(iobuffer_size_to_index(host_len));
+// c->ic_hostname_len = host_len;
+//
+// memcpy(c->ic_hostname->data(), hostname, host_len);
+// }
+//
+// Cache *call_cache = caches[c->frag_type];
+// Action *a = call_cache->open_read(c, &key, &c->ic_request,
+// c->ic_params,
+// c->frag_type, hostname, host_len);
+// // Get rid of purify warnings since 'c' can be freed by open_read.
+// if (a != ACTION_RESULT_DONE) {
+// c->cache_action = a;
+// }
+// break;
+// }
+// case CACHE_OPEN_WRITE:
+// {
+// CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
+// init_from_short(c, msg, ch->machine);
+// Debug("cache_msg",
+// "cache_op-s op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_msg(msg->seq_number, len, "cache_op_open_write");
+//#endif
+// //
+// // Establish the remote side of the ClusterVConnection
+// //
+// c->read_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
+// &c->token,
+// c->cluster_vc_channel,
+// (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_WRITE));
+// if (!c->read_cluster_vc) {
+// // Unable to setup channel, abort processing.
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
+// Debug("chan_inuse",
+// "3Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
+// c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
+//
+// // Send cluster op failed reply
+// c->replyOpEvent(CACHE_EVENT_OPEN_WRITE_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
+// break;
+//
+// } else {
+// c->read_cluster_vc->current_cont = c;
+// }
+// ink_release_assert(c->read_cluster_vc != CLUSTER_DELAYED_OPEN);
+//
+// CacheKey key(msg->md5);
+//
+// char *hostname = NULL;
+// int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+// if (host_len) {
+// hostname = (char *) msg->moi;
+// }
+//
+// Cache *call_cache = caches[c->frag_type];
+// Action *a = call_cache->open_write(c, &key, c->frag_type,
+// !!(c->cfl_flags & CFL_OVERWRITE_ON_WRITE),
+// c->pin_in_cache, hostname, host_len);
+// if (a != ACTION_RESULT_DONE) {
+// c->cache_action = a;
+// }
+// break;
+// }
+// case CACHE_OPEN_WRITE_LONG:
+// {
+// // Cache needs message data, copy it.
+// c->setMsgBufferLen(len);
+// c->allocMsgBuffer();
+// memcpy(c->getMsgBuffer(), (char *) data, len);
+//
+// int flen = CacheOpMsg_long::sizeof_fixedlen_msg();
+// CacheOpMsg_long *msg = unmarshal_CacheOpMsg_long(c->getMsgBuffer(), mh->NeedByteSwap());
+// init_from_long(c, msg, ch->machine);
+// Debug("cache_msg",
+// "cache_op-l op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_msg(msg->seq_number, len, "cache_op_open_write_long");
+//#endif
+// //
+// // Establish the remote side of the ClusterVConnection
+// //
+// c->read_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
+// &c->token,
+// c->cluster_vc_channel,
+// (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_WRITE));
+// if (!c->read_cluster_vc) {
+// // Unable to setup channel, abort processing.
+// CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
+// Debug("chan_inuse",
+// "4Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
+// c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
+//
+// // Send cluster op failed reply
+// c->replyOpEvent(CACHE_EVENT_OPEN_WRITE_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
+// break;
+//
+// } else {
+// c->read_cluster_vc->current_cont = c;
+// }
+// ink_release_assert(c->read_cluster_vc != CLUSTER_DELAYED_OPEN);
+//
+// CacheHTTPInfo *ci = 0;
+// const char *p;
+// int res = 0;
+// int moi_len = len - flen;
+//
+// if (moi_len && c->cfl_flags & CFL_LOPENWRITE_HAVE_OLDINFO) {
+// p = (const char *) msg + flen;
+//
+// // Unmarshal old CacheHTTPInfo
+// res = HTTPInfo::unmarshal((char *) p, moi_len, NULL);
+// ink_assert(res > 0);
+// c->ic_old_info.get_handle((char *) p, moi_len);
+// ink_assert(c->ic_old_info.valid());
+// ci = &c->ic_old_info;
+// } else {
+// p = (const char *) 0;
+// }
+// if (c->cfl_flags & CFL_ALLOW_MULTIPLE_WRITES) {
+// ink_assert(!ci);
+// ci = (CacheHTTPInfo *) CACHE_ALLOW_MULTIPLE_WRITES;
+// }
+// moi_len -= res;
+// p += res;
+//
+// CacheKey key(msg->url_md5);
+// char *hostname = NULL;
+//
+// if (moi_len) {
+// hostname = (char *) p;
+// }
+//
+// Cache *call_cache = caches[c->frag_type];
+// Action *a = call_cache->open_write(c, &key, ci, c->pin_in_cache,
+// NULL, c->frag_type, hostname, len);
+// if (a != ACTION_RESULT_DONE) {
+// c->cache_action = a;
+// }
+// break;
+// }
+// case CACHE_REMOVE:
+// {
+// CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
+// init_from_short(c, msg, ch->machine);
+// Debug("cache_msg",
+// "cache_op op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_msg(msg->seq_number, len, "cache_op_remove");
+//#endif
+// CacheKey key(msg->md5);
+//
+// char *hostname = NULL;
+// int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+// if (host_len) {
+// hostname = (char *) msg->moi;
+// }
+//
+// Cache *call_cache = caches[c->frag_type];
+// Action *a = call_cache->remove(c, &key, c->frag_type,
+// !!(c->cfl_flags & CFL_REMOVE_USER_AGENTS),
+// !!(c->cfl_flags & CFL_REMOVE_LINK),
+// hostname, host_len);
+// if (a != ACTION_RESULT_DONE) {
+// c->cache_action = a;
+// }
+// break;
+// }
+// case CACHE_LINK:
+// {
+// CacheOpMsg_short_2 *msg = unmarshal_CacheOpMsg_short_2(data, mh->NeedByteSwap());
+// init_from_short_2(c, msg, ch->machine);
+// Debug("cache_msg",
+// "cache_op op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_msg(msg->seq_number, len, "cache_op_link");
+//#endif
+//
+// CacheKey key1(msg->md5_1);
+// CacheKey key2(msg->md5_2);
+//
+// char *hostname = NULL;
+// int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+// if (host_len) {
+// hostname = (char *) msg->moi;
+// }
+//
+// Cache *call_cache = caches[c->frag_type];
+// Action *a = call_cache->link(c, &key1, &key2, c->frag_type,
+// hostname, host_len);
+// if (a != ACTION_RESULT_DONE) {
+// c->cache_action = a;
+// }
+// break;
+// }
+// case CACHE_DEREF:
+// {
+// CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
+// init_from_short(c, msg, ch->machine);
+// Debug("cache_msg",
+// "cache_op op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+//#ifdef CACHE_MSG_TRACE
+// log_cache_op_msg(msg->seq_number, len, "cache_op_deref");
+//#endif
+//
+// CacheKey key(msg->md5);
+//
+// char *hostname = NULL;
+// int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+// if (host_len) {
+// hostname = (char *) msg->moi;
+// }
+//
+// Cache *call_cache = caches[c->frag_type];
+// Action *a = call_cache->deref(c, &key, c->frag_type,
+// hostname, host_len);
+// if (a != ACTION_RESULT_DONE) {
+// c->cache_action = a;
+// }
+// break;
+// }
+//
+// default:
+// {
+// ink_release_assert(0);
+// }
+// } // End of switch
+//}
+
+
void
-cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
+cache_op_ClusterFunction(ClusterSession cs, void *context, void *d)
{
- EThread *thread = this_ethread();
+ ClusterCont *cc = (ClusterCont *) d;
+ ink_assert(cc && !context);
+
+ EThread *thread = cc->mutex->thread_holding;
ProxyMutex *mutex = thread->mutex;
////////////////////////////////////////////////////////
// Note: we are running on the ET_CLUSTER thread
@@ -1070,26 +1656,37 @@ cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CACHE_OUTSTANDING_STAT);
int opcode;
- ClusterMessageHeader *mh = (ClusterMessageHeader *) data;
+ int len = cc->data_len;
+
+ // memcpy to release the reference early
+ Ptr<IOBufferData> buf;
+ buf = new_IOBufferData(iobuffer_size_to_index(len, MAX_BUFFER_SIZE_INDEX));
+ char *data = buf.m_ptr->data();
+ for (IOBufferBlock *b = cc->data; b; b = b->next) {
+ memcpy(data, b->_start, b->_end - b->_start);
+ data += b->_end - b->_start;
+ }
+ data = buf->data();
- if (mh->GetMsgVersion() != CacheOpMsg_long::CACHE_OP_LONG_MESSAGE_VERSION) { ////////////////////////////////////////////////
- // Convert from old to current message format
- ////////////////////////////////////////////////
- ink_release_assert(!"cache_op_ClusterFunction() bad msg version");
+ ClusterMessageHeader *mh = (ClusterMessageHeader *) data;
+ ink_assert(mh->GetMsgVersion() == CacheOpMsg_long::CACHE_OP_LONG_MESSAGE_VERSION);
+
+ opcode = ((CacheOpMsg_long *) mh)->opcode;
+ CacheContinuation *c = new_CacheCont(thread);
+ if (cluster_bind_session(cs, c)) {
+ cluster_close_session(cs);
+ free_CacheCont(c);
+ return;
}
- opcode = ((CacheOpMsg_long *) data)->opcode;
-
- // If necessary, create a continuation to reflect the response back
- CacheContinuation *c = CacheContinuation::cacheContAllocator_alloc();
- c->mutex = new_ProxyMutex();
- MUTEX_TRY_LOCK(lock, c->mutex, this_ethread());
c->request_opcode = opcode;
+ c->frag_type = (CacheFragType) ((CacheOpMsg_long *) mh)->frag_type;
c->token.clear();
- c->start_time = ink_get_hrtime();
- c->ch = ch;
- SET_CONTINUATION_HANDLER(c, (CacheContHandler)
- & CacheContinuation::replyOpEvent);
+ c->rw_buf_msg = buf;
+ c->rw_buf_msg_len = len;
+ c->cs = cs;
+
+ MUTEX_TRY_LOCK(lock, c->mutex, c->thread);
switch (opcode) {
case CACHE_OPEN_WRITE_BUFFER:
@@ -1105,33 +1702,9 @@ cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
case CACHE_OPEN_READ:
{
CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
- init_from_short(c, msg, ch->machine);
+ init_from_short(c, msg);
Debug("cache_msg",
- "cache_op-s op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
- //
- // Establish the remote side of the ClusterVConnection
- //
- c->write_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
- &c->token,
- c->cluster_vc_channel,
- (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_READ));
- if (!c->write_cluster_vc) {
- // Unable to setup channel, abort processing.
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
- Debug("chan_inuse",
- "1Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
- c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
-
- // Send cluster op failed reply
- c->replyOpEvent(CACHE_EVENT_OPEN_READ_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
- break;
-
- } else {
- c->write_cluster_vc->current_cont = c;
- }
- ink_release_assert(c->write_cluster_vc != CLUSTER_DELAYED_OPEN);
- ink_release_assert((opcode == CACHE_OPEN_READ)
- || c->write_cluster_vc->pending_remote_fill);
+ "cache_op-s op=%d seqno=%d data=%p len=%d", opcode, c->seq_number, data, len);
SET_CONTINUATION_HANDLER(c, (CacheContHandler)
& CacheContinuation::setupVCdataRead);
@@ -1143,57 +1716,31 @@ cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
#endif
CacheKey key(msg->md5);
- char *hostname = NULL;
- int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
- if (host_len) {
- hostname = (char *) msg->moi.byte;
- }
+ int flen = op_to_sizeof_fixedlen_msg(opcode);
+ c->ic_hostname_len = len - flen;
+ c->ic_hostname = (char *) msg + flen;
Cache *call_cache = caches[c->frag_type];
- c->cache_action = call_cache->open_read(c, &key, c->frag_type, hostname, host_len);
+ c->pending_action = call_cache->open_read(c, &key, c->frag_type, c->ic_hostname, c->ic_hostname_len);
break;
}
case CACHE_OPEN_READ_LONG:
{
// Cache needs message data, copy it.
- c->setMsgBufferLen(len);
- c->allocMsgBuffer();
- memcpy(c->getMsgBuffer(), (char *) data, len);
+// c->setMsgBufferLen(len);
+// c->allocMsgBuffer();
+// memcpy(c->getMsgBuffer(), (char *) data, len);
int flen = CacheOpMsg_long::sizeof_fixedlen_msg();
- CacheOpMsg_long *msg = unmarshal_CacheOpMsg_long(c->getMsgBuffer(), mh->NeedByteSwap());
- init_from_long(c, msg, ch->machine);
+ CacheOpMsg_long *msg = unmarshal_CacheOpMsg_long(data, mh->NeedByteSwap());
+ init_from_long(c, msg);
Debug("cache_msg",
- "cache_op-l op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+ "cache_op-l op=%d seqno=%d data=%p len=%d", opcode, c->seq_number, data, len);
#ifdef CACHE_MSG_TRACE
log_cache_op_msg(msg->seq_number, len, "cache_op_open_read_long");
#endif
- //
- // Establish the remote side of the ClusterVConnection
- //
- c->write_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
- &c->token,
- c->cluster_vc_channel,
- (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_READ));
- if (!c->write_cluster_vc) {
- // Unable to setup channel, abort processing.
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
- Debug("chan_inuse",
- "2Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
- c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
-
- // Send cluster op failed reply
- c->replyOpEvent(CACHE_EVENT_OPEN_READ_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
- break;
-
- } else {
- c->write_cluster_vc->current_cont = c;
- }
- ink_release_assert(c->write_cluster_vc != CLUSTER_DELAYED_OPEN);
- ink_release_assert((opcode == CACHE_OPEN_READ_LONG)
- || c->write_cluster_vc->pending_remote_fill);
SET_CONTINUATION_HANDLER(c, (CacheContHandler)
- & CacheContinuation::setupReadWriteVC);
+ & CacheContinuation::setupVCdataRead);
Debug("cache_proto",
"1read op, seqno=%d chan=%d bufsize=%d token=%d,%d",
msg->seq_number, msg->channel, msg->buffer_size, msg->token.ip_created, msg->token.sequence_number);
@@ -1215,6 +1762,11 @@ cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
// Unmarshal CacheLookupHttpConfig
c->ic_params = new(CacheLookupHttpConfigAllocator.alloc())
CacheLookupHttpConfig();
+ memcpy(c->ic_params, p, sizeof(CacheLookupHttpConfig));
+ moi_len -= sizeof(CacheLookupHttpConfig);
+ p += sizeof(CacheLookupHttpConfig);
+
+ ink_assert(moi_len > 0);
res = c->ic_params->unmarshal(&c->ic_arena, (const char *) p, moi_len);
ink_assert(res > 0);
@@ -1223,132 +1775,81 @@ cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
CacheKey key(msg->url_md5);
- char *hostname = NULL;
- int host_len = 0;
-
if (moi_len) {
- hostname = (char *) p;
- host_len = moi_len;
-
- // Save hostname and attach it to the continuation since we may
- // need it if we convert this to an open_write.
-
- c->ic_hostname = new_IOBufferData(iobuffer_size_to_index(host_len));
- c->ic_hostname_len = host_len;
-
- memcpy(c->ic_hostname->data(), hostname, host_len);
+ c->ic_hostname = (char *) p;
+ c->ic_hostname_len = moi_len;
}
Cache *call_cache = caches[c->frag_type];
Action *a = call_cache->open_read(c, &key, &c->ic_request,
c->ic_params,
- c->frag_type, hostname, host_len);
+ c->frag_type, c->ic_hostname, c->ic_hostname_len);
// Get rid of purify warnings since 'c' can be freed by open_read.
if (a != ACTION_RESULT_DONE) {
- c->cache_action = a;
+ c->pending_action = a;
}
break;
}
case CACHE_OPEN_WRITE:
{
CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
- init_from_short(c, msg, ch->machine);
+ init_from_short(c, msg);
Debug("cache_msg",
- "cache_op-s op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+ "cache_op-s op=%d seqno=%d data=%p len=%d", opcode, c->seq_number, data, len);
#ifdef CACHE_MSG_TRACE
log_cache_op_msg(msg->seq_number, len, "cache_op_open_write");
#endif
- //
- // Establish the remote side of the ClusterVConnection
- //
- c->read_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
- &c->token,
- c->cluster_vc_channel,
- (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_WRITE));
- if (!c->read_cluster_vc) {
- // Unable to setup channel, abort processing.
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
- Debug("chan_inuse",
- "3Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
- c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
-
- // Send cluster op failed reply
- c->replyOpEvent(CACHE_EVENT_OPEN_WRITE_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
- break;
-
- } else {
- c->read_cluster_vc->current_cont = c;
- }
- ink_release_assert(c->read_cluster_vc != CLUSTER_DELAYED_OPEN);
CacheKey key(msg->md5);
- char *hostname = NULL;
- int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
- if (host_len) {
- hostname = (char *) msg->moi.byte;
+ int flen = op_to_sizeof_fixedlen_msg(opcode);
+ c->ic_hostname_len = len - flen;
+ if (c->ic_hostname_len) {
+ c->ic_hostname = (char *) msg + flen;
}
+ SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+ & CacheContinuation::setupVCdataWrite);
Cache *call_cache = caches[c->frag_type];
Action *a = call_cache->open_write(c, &key, c->frag_type,
!!(c->cfl_flags & CFL_OVERWRITE_ON_WRITE),
- c->pin_in_cache, hostname, host_len);
+ c->pin_in_cache, c->ic_hostname, c->ic_hostname_len);
if (a != ACTION_RESULT_DONE) {
- c->cache_action = a;
+ c->pending_action = a;
}
break;
}
case CACHE_OPEN_WRITE_LONG:
{
// Cache needs message data, copy it.
- c->setMsgBufferLen(len);
- c->allocMsgBuffer();
- memcpy(c->getMsgBuffer(), (char *) data, len);
+// c->setMsgBufferLen(len);
+// c->allocMsgBuffer();
+// memcpy(c->getMsgBuffer(), (char *) data, len);
int flen = CacheOpMsg_long::sizeof_fixedlen_msg();
CacheOpMsg_long *msg = unmarshal_CacheOpMsg_long(c->getMsgBuffer(), mh->NeedByteSwap());
- init_from_long(c, msg, ch->machine);
+ init_from_long(c, msg);
Debug("cache_msg",
- "cache_op-l op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+ "cache_op-l op=%d seqno=%d data=%p len=%d", opcode, c->seq_number, data, len);
#ifdef CACHE_MSG_TRACE
log_cache_op_msg(msg->seq_number, len, "cache_op_open_write_long");
#endif
- //
- // Establish the remote side of the ClusterVConnection
- //
- c->read_cluster_vc = clusterProcessor.connect_local((Continuation *) 0,
- &c->token,
- c->cluster_vc_channel,
- (CLUSTER_OPT_IMMEDIATE | CLUSTER_OPT_CONN_WRITE));
- if (!c->read_cluster_vc) {
- // Unable to setup channel, abort processing.
- CLUSTER_INCREMENT_DYN_STAT(CLUSTER_CHAN_INUSE_STAT);
- Debug("chan_inuse",
- "4Remote chan=%d inuse tok.ip=%u.%u.%u.%u tok.seqno=%d seqno=%d",
- c->cluster_vc_channel, DOT_SEPARATED(c->token.ip_created), c->token.sequence_number, c->seq_number);
-
- // Send cluster op failed reply
- c->replyOpEvent(CACHE_EVENT_OPEN_WRITE_FAILED, (VConnection *) - ECLUSTER_CHANNEL_INUSE);
- break;
-
- } else {
- c->read_cluster_vc->current_cont = c;
- }
- ink_release_assert(c->read_cluster_vc != CLUSTER_DELAYED_OPEN);
CacheHTTPInfo *ci = 0;
- const char *p = (const char *) msg + flen;
+ const char *p;
int res = 0;
int moi_len = len - flen;
- if (moi_len && c->cfl_flags & CFL_LOPENWRITE_HAVE_OLDINFO) {
-
+ if (moi_len && (c->cfl_flags & CFL_LOPENWRITE_HAVE_OLDINFO)) {
+ p = (const char *) msg + flen;
// Unmarshal old CacheHTTPInfo
res = HTTPInfo::unmarshal((char *) p, moi_len, NULL);
ink_assert(res > 0);
c->ic_old_info.get_handle((char *) p, moi_len);
ink_assert(c->ic_old_info.valid());
ci = &c->ic_old_info;
+ } else {
+ p = (const char *) 0;
}
if (c->cfl_flags & CFL_ALLOW_MULTIPLE_WRITES) {
ink_assert(!ci);
@@ -1358,53 +1859,60 @@ cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
p += res;
CacheKey key(msg->url_md5);
- char *hostname = NULL;
if (moi_len) {
- hostname = (char *) p;
+ c->ic_hostname = (char *) p;
+ c->ic_hostname_len = moi_len;
}
+ SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+ & CacheContinuation::setupVCdataWrite);
+
Cache *call_cache = caches[c->frag_type];
Action *a = call_cache->open_write(c, &key, ci, c->pin_in_cache,
- NULL, c->frag_type, hostname, moi_len);
+ NULL, c->frag_type, c->ic_hostname, c->ic_hostname_len);
if (a != ACTION_RESULT_DONE) {
- c->cache_action = a;
+ c->pending_action = a;
}
break;
}
case CACHE_REMOVE:
{
CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
- init_from_short(c, msg, ch->machine);
+ init_from_short(c, msg);
Debug("cache_msg",
- "cache_op op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+ "cache_op op=%d seqno=%d data=%p len=%d", opcode, c->seq_number, data, len);
#ifdef CACHE_MSG_TRACE
log_cache_op_msg(msg->seq_number, len, "cache_op_remove");
#endif
CacheKey key(msg->md5);
- char *hostname = NULL;
- int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+ int flen = op_to_sizeof_fixedlen_msg(opcode);
+ int host_len = len - flen;
if (host_len) {
- hostname = (char *) msg->moi.byte;
+ c->ic_hostname = (char *) msg + flen;
+ c->ic_hostname_len = host_len;
}
+ SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+ & CacheContinuation::setupVCdataRemove);
+
Cache *call_cache = caches[c->frag_type];
Action *a = call_cache->remove(c, &key, c->frag_type,
!!(c->cfl_flags & CFL_REMOVE_USER_AGENTS),
!!(c->cfl_flags & CFL_REMOVE_LINK),
- hostname, host_len);
+ c->ic_hostname, c->ic_hostname_len);
if (a != ACTION_RESULT_DONE) {
- c->cache_action = a;
+ c->pending_action = a;
}
break;
}
case CACHE_LINK:
{
CacheOpMsg_short_2 *msg = unmarshal_CacheOpMsg_short_2(data, mh->NeedByteSwap());
- init_from_short_2(c, msg, ch->machine);
+ init_from_short_2(c, msg);
Debug("cache_msg",
- "cache_op op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+ "cache_op op=%d seqno=%d data=%p len=%d", opcode, c->seq_number, data, len);
#ifdef CACHE_MSG_TRACE
log_cache_op_msg(msg->seq_number, len, "cache_op_link");
#endif
@@ -1412,364 +1920,671 @@ cache_op_ClusterFunction(ClusterHandler * ch, void *data, int len)
CacheKey key1(msg->md5_1);
CacheKey key2(msg->md5_2);
- char *hostname = NULL;
- int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+ int flen = op_to_sizeof_fixedlen_msg(opcode);
+ int host_len = len - flen;
if (host_len) {
- hostname = (char *) msg->moi.byte;
+ c->ic_hostname = (char *) msg + flen;
+ c->ic_hostname_len = host_len;
}
+ SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+ & CacheContinuation::setupVCdataLink);
+
Cache *call_cache = caches[c->frag_type];
Action *a = call_cache->link(c, &key1, &key2, c->frag_type,
- hostname, host_len);
+ c->ic_hostname, c->ic_hostname_len);
if (a != ACTION_RESULT_DONE) {
- c->cache_action = a;
+ c->pending_action = a;
}
break;
}
case CACHE_DEREF:
{
CacheOpMsg_short *msg = unmarshal_CacheOpMsg_short(data, mh->NeedByteSwap());
- init_from_short(c, msg, ch->machine);
+ init_from_short(c, msg);
Debug("cache_msg",
- "cache_op op=%d seqno=%d data=%p len=%d machine=%p", opcode, c->seq_number, data, len, ch->machine);
+ "cache_op op=%d seqno=%d data=%p len=%d", opcode, c->seq_number, data, len);
#ifdef CACHE_MSG_TRACE
log_cache_op_msg(msg->seq_number, len, "cache_op_deref");
#endif
CacheKey key(msg->md5);
- char *hostname = NULL;
- int host_len = len - op_to_sizeof_fixedlen_msg(opcode);
+ int flen = op_to_sizeof_fixedlen_msg(opcode);
+ int host_len = len - flen;
if (host_len) {
- hostname = (char *) msg->moi.byte;
+ c->ic_hostname = (char *) msg + flen;
+ c->ic_hostname_len = host_len;
}
+ SET_CONTINUATION_HANDLER(c, (CacheContHandler)
+ & CacheContinuation::setupVCdataDeref);
+
Cache *call_cache = caches[c->frag_type];
Action *a = call_cache->deref(c, &key, c->frag_type,
- hostname, host_len);
+ c->ic_hostname, c->ic_hostname_len);
if (a != ACTION_RESULT_DONE) {
- c->cache_action = a;
+ c->pending_action = a;
}
break;
}
default:
{
- ink_release_assert(0);
+ ink_assert(0);
+ break;
}
} // End of switch
}
-
void
cache_op_malloc_ClusterFunction(ClusterHandler *ch, void *data, int len)
{
- cache_op_ClusterFunction(ch, data, len);
- // We own the message data, free it back to the Cluster subsystem
- clusterProcessor.free_remote_data((char *) data, len);
+// cache_op_ClusterFunction(ch, data, len);
+// // We own the message data, free it back to the Cluster subsystem
+// clusterProcessor.free_remote_data((char *) data, len);
+ (void) ch;
+ (void) data;
+ (void) len;
+ return;
}
+//struct HeadData
+//{
+// int32_t magic; // feedbabe
+// int32_t h_len;
+// int32_t d_len;
+// uint32_t flags;
+//
+// char *hdr() {
+// return (char *)this + sizeof(HeadData);
+// }
+//
+// int32_t hdr_len() {
+// return h_len;
+// }
+//
+// int32_t data_len() {
+// return d_len;
+// }
+//
+// char *data() {
+// return (char *)this + hdr_len + sizeof(HeadData);
+// }
+//};
+
int
-CacheContinuation::setupVCdataRead(int event, VConnection * vc)
+CacheContinuation::setupVCdataRead(int event, void *data)
{
ink_assert(magicno == (int) MagicNo);
//
// Setup the initial data read for the given Cache VC.
// This data is sent back in the response message.
//
+ if (event > CLUSTER_MSG_START && event <= CLUSTER_INTERNEL_ERROR) {
+ Debug("cache_proto", "replyOpEvent: freeing this=%p", this);
+ ink_assert(cluster_close_session(cs));
+ free_CacheCont(this);
+ return EVENT_DONE;
+ }
+
+ pending_action = NULL;
+ result = (event == CACHE_EVENT_OPEN_READ ? CACHE_EVENT_OPEN_READ : CACHE_EVENT_OPEN_READ_FAILED);
+
if (event == CACHE_EVENT_OPEN_READ) {
//////////////////////////////////////////
// Allocate buffer and initiate read.
//////////////////////////////////////////
Debug("cache_proto", "setupVCdataRead CACHE_EVENT_OPEN_READ seqno=%d", seq_number);
- ink_release_assert(c
<TRUNCATED>