[Devel] [PATCH RHEL7 COMMIT] bc: Rip old network buffers and sockets accounting
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Jun 5 12:55:24 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.10
------>
commit cf2ef92ce42fa3f21df70b3906842169eb21a885
Author: Pavel Emelyanov <xemul at parallels.com>
Date: Fri Jun 5 23:55:13 2015 +0400
bc: Rip old network buffers and sockets accounting
The previous BC approach was based on two ideas. The first is
that we know the maximum amount of sockets and thair buffers a
container needs. The second was about "poll semantics" i.e.
when a poll() reports "writable", the write/send system call
cannot block on buffer limit hit. To address the latter the
BC networking code operated on poll-reserve value on a socket.
All this no longer true for VSwap approach when we have only
one mandatory limitation -- the total container memory. With
this the natural way is to account all the memory allocated
by container into one counter and balance (if required) all
the types of memory within it without necessary admin attention.
To acheive this we have the kmem accounting patches that count
most of the in-kernel allocations into separate counter and
add one into total CT memory. The sockets themselves are thus
already accounted into kmem, network buffers -- partially,
only outgoing do, incoming don't. Plus we have non-trivial
TCP memory management code that takes available memory into
account in window size management. The next patches try to
fix the buffers accounting and it's much easier to read them
if the old BC networking code is first removed.
https://jira.sw.ru/browse/PSBM-33584
khorenko@ changes:
- include beancounter.h into net/netfilter/x_tables.c
- zap old BC accounting in net/dccp/minisocks.c
- remove bc/sock.h from net/netfilter/nf_conntrack_netlink.c
Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
include/bc/net.h | 227 -------
include/bc/sock.h | 47 --
include/bc/sock_orphan.h | 97 ---
include/bc/tcp.h | 76 ---
include/linux/skbuff.h | 14 +-
include/net/inet_timewait_sock.h | 4 -
include/net/sock.h | 23 +-
kernel/bc/net.c | 1092 ----------------------------------
net/core/datagram.c | 11 +-
net/core/skbuff.c | 16 -
net/core/sock.c | 48 +-
net/core/stream.c | 16 +-
net/dccp/minisocks.c | 5 +-
net/ipv4/af_inet.c | 11 -
net/ipv4/inet_connection_sock.c | 7 +-
net/ipv4/inet_timewait_sock.c | 25 +-
net/ipv4/proc.c | 2 +-
net/ipv4/tcp.c | 78 +--
net/ipv4/tcp_input.c | 13 -
net/ipv4/tcp_ipv4.c | 2 -
net/ipv4/tcp_minisocks.c | 5 +-
net/ipv4/tcp_output.c | 26 -
net/ipv6/af_inet6.c | 11 -
net/ipv6/tcp_ipv6.c | 2 -
net/netfilter/nf_conntrack_core.c | 1 -
net/netfilter/nf_conntrack_netlink.c | 1 -
net/netfilter/x_tables.c | 1 +
net/netlink/af_netlink.c | 33 +-
net/packet/af_packet.c | 21 -
net/socket.c | 3 -
net/unix/af_unix.c | 31 +-
31 files changed, 44 insertions(+), 1905 deletions(-)
diff --git a/include/bc/net.h b/include/bc/net.h
deleted file mode 100644
index e0fb572..0000000
--- a/include/bc/net.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * include/bc/net.h
- *
- * Copyright (C) 2005 SWsoft
- * All rights reserved.
- *
- * Licensing governed by "linux/COPYING.SWsoft" file.
- *
- */
-
-#ifndef __BC_NET_H_
-#define __BC_NET_H_
-
-/*
- * UB_NUMXXXSOCK, UB_XXXBUF accounting
- */
-
-#include <bc/sock.h>
-#include <bc/beancounter.h>
-
-#ifdef CONFIG_BEANCOUNTERS
-#undef CONFIG_BEANCOUNTERS
-#define CONFIG_BEANCOUNTERS_WILL_BE_BACK
-#endif
-#undef __BC_DECL_H_
-#undef UB_DECLARE_FUNC
-#undef UB_DECLARE_VOID_FUNC
-#include <bc/decl.h>
-
-#define bid2sid(__bufid) \
- ((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
-
-#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
- ~(SMP_CACHE_BYTES-1)))
-#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
-
-static inline int ub_skb_alloc_bc(struct sk_buff *skb, gfp_t gfp_mask)
-{
-#ifdef CONFIG_BEANCOUNTERS
- memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
-#endif
- return 0;
-}
-
-static inline void ub_skb_free_bc(struct sk_buff *skb)
-{
-}
-
-#define IS_TCP_SOCK(__family, __type) \
- (((__family) == PF_INET || (__family) == PF_INET6) && (__type) == SOCK_STREAM)
-
-/* number of sockets */
-UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
-UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk))
-UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
-UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
-
-/* management of queue for send space */
-UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo,
- unsigned long size))
-UB_DECLARE_FUNC(int, ub_sock_snd_queue_add(struct sock *sk, int resource,
- unsigned long size))
-UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
-
-/* send space */
-UB_DECLARE_FUNC(int, ub_sock_make_wreserv(struct sock *sk, int bufid,
- unsigned long size))
-UB_DECLARE_FUNC(int, ub_sock_get_wreserv(struct sock *sk, int bufid,
- unsigned long size))
-UB_DECLARE_VOID_FUNC(ub_sock_ret_wreserv(struct sock *sk, int bufid,
- unsigned long size, unsigned long ressize))
-UB_DECLARE_FUNC(int, ub_sock_tcp_chargesend(struct sock *sk,
- struct sk_buff *skb, enum ub_severity strict))
-UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
-UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
-
-UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
-
-/* receive space */
-UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
-UB_DECLARE_FUNC(int, ub_sock_tcp_chargerecv(struct sock *sk,
- struct sk_buff *skb, enum ub_severity strict))
-
-/* skb destructor */
-UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
-
-static inline int ub_sock_makewres_other(struct sock *sk, unsigned long size)
-{
- return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
-}
-
-static inline int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
-{
- return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
-}
-
-UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk,
- unsigned long size))
-
-static inline int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
-{
- return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
-}
-
-UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk,
- unsigned long size, unsigned long ressize))
-
-static inline void ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
- unsigned long ressize)
-{
- ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
-}
-
-static inline int ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
-{
- return ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
-}
-
-static inline int ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
-{
- return ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
-}
-
-static inline int ub_tcpsndbuf_charge(struct sock *sk,
- struct sk_buff *skb)
-{
- return ub_sock_tcp_chargesend(sk, skb, UB_HARD);
-}
-
-static inline int ub_tcpsndbuf_charge_forced(struct sock *sk,
- struct sk_buff *skb)
-{
- return ub_sock_tcp_chargesend(sk, skb, UB_FORCE);
-}
-
-static inline int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
-{
- return ub_sock_tcp_chargerecv(sk, skb, UB_SOFT);
-}
-
-static inline int ub_tcprcvbuf_charge_forced(struct sock *sk,
- struct sk_buff *skb)
-{
- return ub_sock_tcp_chargerecv(sk, skb, UB_FORCE);
-}
-
-/* Charge size */
-static inline unsigned long skb_charge_datalen(unsigned long chargesize)
-{
-#ifdef CONFIG_BEANCOUNTERS
- unsigned long slabsize;
-
- chargesize -= sizeof(struct sk_buff);
- slabsize = 64;
- do {
- slabsize <<= 1;
- } while (slabsize <= chargesize);
-
- slabsize >>= 1;
- return (slabsize - sizeof(struct skb_shared_info)) &
- ~(SMP_CACHE_BYTES-1);
-#else
- return 0;
-#endif
-}
-
-static inline unsigned long skb_charge_size_gen(unsigned long size)
-{
-#ifdef CONFIG_BEANCOUNTERS
- unsigned long slabsize;
-
- size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
- slabsize = roundup_pow_of_two(size);
-
- return slabsize + sizeof(struct sk_buff);
-#else
- return 0;
-#endif
-
-}
-
-static inline unsigned long skb_charge_size_const(unsigned long size)
-{
-#ifdef CONFIG_BEANCOUNTERS
- unsigned int ret;
- if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
- ret = 64 + sizeof(struct sk_buff);
- else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
- ret = 128 + sizeof(struct sk_buff);
- else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
- ret = 256 + sizeof(struct sk_buff);
- else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
- ret = 512 + sizeof(struct sk_buff);
- else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
- ret = 1024 + sizeof(struct sk_buff);
- else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
- ret = 2048 + sizeof(struct sk_buff);
- else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
- ret = 4096 + sizeof(struct sk_buff);
- else
- ret = skb_charge_size_gen(size);
- return ret;
-#else
- return 0;
-#endif
-}
-
-
-#define skb_charge_size(__size) \
- (__builtin_constant_p(__size) ? \
- skb_charge_size_const(__size) : \
- skb_charge_size_gen(__size))
-
-UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
-UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb,
- struct sock *sk, unsigned long size, int res))
-UB_DECLARE_FUNC(int, __ub_too_many_orphans(struct sock *sk, int count))
-
-#ifdef CONFIG_BEANCOUNTERS_WILL_BE_BACK
-#define CONFIG_BEANCOUNTERS 1
-#undef CONFIG_BEANCOUNTERS_WILL_BE_BACK
-#endif
-#undef __BC_DECL_H_
-#undef UB_DECLARE_FUNC
-#undef UB_DECLARE_VOID_FUNC
-
-#endif
diff --git a/include/bc/sock.h b/include/bc/sock.h
deleted file mode 100644
index b314c9b..0000000
--- a/include/bc/sock.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * include/bc/sock.h
- *
- * Copyright (C) 2005 SWsoft
- * All rights reserved.
- *
- * Licensing governed by "linux/COPYING.SWsoft" file.
- *
- */
-
-#ifndef __BC_SOCK_H_
-#define __BC_SOCK_H_
-
-#include <bc/task.h>
-
-struct sock;
-struct sk_buff;
-
-struct skb_beancounter {
- struct user_beancounter *ub;
- unsigned long charged:27, resource:5;
-};
-
-struct sock_beancounter {
- struct user_beancounter *ub;
- /*
- * poll_reserv accounts space already charged for future sends.
- * It is required to make poll agree with sendmsg.
- * Additionally, it makes real charges (with taking bc spinlock)
- * in the send path rarer, speeding networking up.
- * For TCP (only): changes are protected by socket lock (not bc!)
- * For all proto: may be read without serialization in poll.
- */
- unsigned long poll_reserv;
- unsigned long forw_space;
- /* fields below are protected by bc spinlock */
- unsigned long ub_waitspc; /* space waiting for */
- unsigned long ub_wcharged;
- struct list_head ub_sock_list;
-};
-
-#define sock_bc(__sk) (&(__sk)->sk_bc)
-#define skb_bc(__skb) (&(__skb)->skb_bc)
-#define skbc_sock(__skbc) (container_of(__skbc, struct sock, sk_bc))
-#define sock_has_ubc(__sk) (sock_bc(__sk)->ub != NULL)
-
-#endif
diff --git a/include/bc/sock_orphan.h b/include/bc/sock_orphan.h
deleted file mode 100644
index a737d9e..0000000
--- a/include/bc/sock_orphan.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * include/bc/sock_orphan.h
- *
- * Copyright (C) 2005 SWsoft
- * All rights reserved.
- *
- * Licensing governed by "linux/COPYING.SWsoft" file.
- *
- */
-
-#ifndef __BC_SOCK_ORPHAN_H_
-#define __BC_SOCK_ORPHAN_H_
-
-#include <net/tcp.h>
-
-#include "bc/beancounter.h"
-#include "bc/net.h"
-
-
-static inline struct percpu_counter *__ub_get_orphan_count_ptr(struct sock *sk)
-{
- if (sock_has_ubc(sk))
- return &sock_bc(sk)->ub->ub_orphan_count;
- return sk->sk_prot->orphan_count;
-}
-
-static inline void ub_inc_orphan_count(struct sock *sk)
-{
- percpu_counter_inc(__ub_get_orphan_count_ptr(sk));
-}
-
-static inline void ub_dec_orphan_count(struct sock *sk)
-{
- percpu_counter_dec(__ub_get_orphan_count_ptr(sk));
-}
-
-static inline int ub_get_orphan_count(struct sock *sk)
-{
- return percpu_counter_sum_positive(__ub_get_orphan_count_ptr(sk));
-}
-
-static inline int ub_too_many_orphans(struct sock *sk, int count)
-{
-#ifdef CONFIG_BEANCOUNTERS
- if (__ub_too_many_orphans(sk, count))
- return 1;
-#endif
- return (ub_get_orphan_count(sk) > sysctl_tcp_max_orphans ||
- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
- atomic_long_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
-}
-
-struct inet_timewait_sock;
-
-static inline void ub_timewait_mod(struct inet_timewait_sock *tw, int incdec)
-{
-#ifdef CONFIG_BEANCOUNTERS
- tw->tw_ub->ub_tw_count += incdec;
-#endif
-}
-
-static inline int __ub_timewait_check(struct sock *sk)
-{
-#ifdef CONFIG_BEANCOUNTERS
- struct user_beancounter *ub;
- unsigned long mem_max, mem;
- int tw_count;
-
- ub = sock_bc(sk)->ub;
- if (ub == NULL)
- return 1;
-
- tw_count = ub->ub_tw_count;
- mem_max = sysctl_tcp_max_tw_kmem_fraction *
- ((ub->ub_parms[UB_KMEMSIZE].limit >> 10) + 1);
- mem = sk->sk_prot_creator->twsk_prot->twsk_obj_size;
- mem *= tw_count;
- return tw_count < sysctl_tcp_max_tw_buckets_ub && mem < mem_max;
-#else
- return 1;
-#endif
-}
-
-#define ub_timewait_inc(tw, twdr) do { \
- if ((twdr)->ub_managed) \
- ub_timewait_mod(tw, 1); \
- } while (0)
-
-#define ub_timewait_dec(tw, twdr) do { \
- if ((twdr)->ub_managed) \
- ub_timewait_mod(tw, -1); \
- } while (0)
-
-#define ub_timewait_check(sk, twdr) ((!(twdr)->ub_managed) || \
- __ub_timewait_check(sk))
-
-#endif
diff --git a/include/bc/tcp.h b/include/bc/tcp.h
deleted file mode 100644
index d2bf748..0000000
--- a/include/bc/tcp.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * include/bc/tcp.h
- *
- * Copyright (C) 2005 SWsoft
- * All rights reserved.
- *
- * Licensing governed by "linux/COPYING.SWsoft" file.
- *
- */
-
-#ifndef __BC_TCP_H_
-#define __BC_TCP_H_
-
-/*
- * UB_NUMXXXSOCK, UB_XXXBUF accounting
- */
-
-#include <bc/sock.h>
-#include <bc/beancounter.h>
-
-static inline void ub_tcp_update_maxadvmss(struct sock *sk)
-{
-#ifdef CONFIG_BEANCOUNTERS
- if (!sock_has_ubc(sk))
- return;
- if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
- return;
-
- sock_bc(sk)->ub->ub_maxadvmss =
- skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
- + sizeof(struct tcphdr) + tcp_sk(sk)->advmss);
-#endif
-}
-
-static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
-{
- if (tcp_memory_pressure)
- return 0;
-#ifdef CONFIG_BEANCOUNTERS
- if (sock_has_ubc(sk)) {
- struct user_beancounter *ub;
-
- ub = sock_bc(sk)->ub;
- if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
- return 1;
- if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
- return 0;
- return sk->sk_rcvbuf <= ub->ub_rmem_thres;
- }
-#endif
- return 1;
-}
-
-static inline int ub_tcp_memory_pressure(struct sock *sk)
-{
- if (tcp_memory_pressure)
- return 1;
-#ifdef CONFIG_BEANCOUNTERS
- if (sock_has_ubc(sk))
- return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
-#endif
- return 0;
-}
-
-static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
-{
- if (tcp_memory_pressure)
- return 1;
-#ifdef CONFIG_BEANCOUNTERS
- if (sock_has_ubc(sk))
- return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
-#endif
- return 0;
-}
-
-#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ae20f57..90934a4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -405,8 +405,6 @@ typedef unsigned char *sk_buff_data_t;
* @users: User count - see {datagram,tcp}.c
*/
-#include <bc/sock.h>
-
struct sk_buff {
/* These two members must be first. */
struct sk_buff *next;
@@ -546,7 +544,6 @@ struct sk_buff {
*data;
unsigned int truesize;
atomic_t users;
- struct skb_beancounter skb_bc;
};
#ifdef __KERNEL__
@@ -554,7 +551,6 @@ struct sk_buff {
* Handling routines are only of interest to the kernel
*/
#include <linux/slab.h>
-#include <bc/net.h>
#define SKB_ALLOC_FCLONE 0x01
@@ -1873,7 +1869,7 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
* destructor function and make the @skb unowned. The buffer continues
* to exist but is no longer charged to its former owner.
*/
-static inline void __skb_orphan(struct sk_buff *skb)
+static inline void skb_orphan(struct sk_buff *skb)
{
if (skb->destructor)
skb->destructor(skb);
@@ -1897,14 +1893,6 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
return skb_copy_ubufs(skb, gfp_mask);
}
-static inline void skb_orphan(struct sk_buff *skb)
-{
- if (skb->sk)
- ub_skb_uncharge(skb);
-
- __skb_orphan(skb);
-}
-
/**
* __skb_queue_purge - empty a list
* @list: list to empty
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 057e40a..71c6e26 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -86,7 +86,6 @@ struct inet_timewait_death_row {
struct inet_hashinfo *hashinfo;
int sysctl_tw_recycle;
int sysctl_max_tw_buckets;
- int ub_managed;
};
void inet_twdr_hangman(unsigned long data);
@@ -141,9 +140,6 @@ struct inet_timewait_sock {
u32 tw_ttd;
struct inet_bind_bucket *tw_tb;
struct hlist_node tw_death_node;
-#ifdef CONFIG_BEANCOUNTERS
- struct user_beancounter *tw_ub;
-#endif
};
#define tw_tclass tw_tos
diff --git a/include/net/sock.h b/include/net/sock.h
index f3c352e..0688f4e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -438,7 +438,6 @@ struct sock {
void (*rh_reserved6)(void);
void (*rh_reserved7)(void);
void (*rh_reserved8)(void);
- struct sock_beancounter sk_bc;
};
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
@@ -446,8 +445,6 @@ struct sock {
#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
-#include <bc/net.h>
-
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
* or not whether his port will be reused by someone else. SK_FORCE_REUSE
@@ -898,8 +895,6 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
})
extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
-extern int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
- unsigned long amount);
extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
extern int sk_stream_error(struct sock *sk, int flags, int err);
@@ -1427,12 +1422,9 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
if (!sk_has_account(sk))
return true;
- if (!(size <= sk->sk_forward_alloc ||
+ return size<= sk->sk_forward_alloc ||
__sk_mem_schedule(sk, size, SK_MEM_RECV) ||
- skb_pfmemalloc(skb)))
- return false;
-
- return !ub_sockrcvbuf_charge(sk, skb);
+ skb_pfmemalloc(skb);
}
static inline void sk_mem_reclaim(struct sock *sk)
@@ -1575,11 +1567,6 @@ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
unsigned long data_len,
int noblock,
int *errcode);
-extern struct sk_buff *sock_alloc_send_skb2(struct sock *sk,
- unsigned long size,
- unsigned long size2,
- int noblock,
- int *errcode);
extern void *sock_kmalloc(struct sock *sk, int size,
gfp_t priority);
extern void sock_kfree_s(struct sock *sk, void *mem, int size);
@@ -2043,8 +2030,7 @@ static inline void sock_poll_wait(struct file *filp,
static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
- WARN_ON(skb->destructor);
- __skb_orphan(skb);
+ skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_wfree;
/*
@@ -2057,8 +2043,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
- WARN_ON(skb->destructor);
- __skb_orphan(skb);
+ skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_rfree;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
diff --git a/kernel/bc/net.c b/kernel/bc/net.c
deleted file mode 100644
index 9d823314..0000000
--- a/kernel/bc/net.c
+++ /dev/null
@@ -1,1092 +0,0 @@
-/*
- * linux/kernel/bc/net.c
- *
- * Copyright (C) 1998-2004 Andrey V. Savochkin <saw at saw.sw.com.sg>
- * Copyright (C) 2005 SWsoft
- * All rights reserved.
- *
- * Licensing governed by "linux/COPYING.SWsoft" file.
- *
- * TODO:
- * - sizeof(struct inode) charge
- * = tcp_mem_schedule() feedback based on ub limits
- * + measures so that one socket won't exhaust all send buffers,
- * see bug in bugzilla
- * = sk->socket check for NULL in snd_wakeups
- * (tcp_write_space checks for NULL itself)
- * + in tcp_close(), orphaned socket abortion should be based on ubc
- * resources (same in tcp_out_of_resources)
- * Beancounter should also have separate orphaned socket counter...
- * + for rcv, in-order segment should be accepted
- * if only barrier is exceeded
- * = tcp_rmem_schedule() feedback based on ub limits
- * - repair forward_alloc mechanism for receive buffers
- * It's idea is that some buffer space is pre-charged so that receive fast
- * path doesn't need to take spinlocks and do other heavy stuff
- * + tcp_prune_queue actions based on ub limits
- * + window adjustments depending on available buffers for receive
- * - window adjustments depending on available buffers for send
- * + race around usewreserv
- * + avoid allocating new page for each tiny-gram, see letter from ANK
- * + rename ub_sock_lock
- * + sk->sleep wait queue probably can be used for all wakeups, and
- * sk->ub_wait is unnecessary
- * + for UNIX sockets, the current algorithm will lead to
- * UB_UNIX_MINBUF-sized messages only for non-blocking case
- * - charge for af_packet sockets
- * + all datagram sockets should be charged to NUMUNIXSOCK
- * - we do not charge for skb copies and clones staying in device queues
- * + live-lock if number of sockets is big and buffer limits are small
- * [diff-ubc-dbllim3]
- * - check that multiple readers/writers on the same socket won't cause fatal
- * consequences
- * - check allocation/charge orders
- * + There is potential problem with callback_lock. In *snd_wakeup we take
- * beancounter first, in sock_def_error_report - callback_lock first.
- * then beancounter. This is not a problem if callback_lock taken
- * readonly, but anyway...
- * - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
- * General kernel problems:
- * - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
- * notification won't get signals
- * - datagram_poll looks racy
- *
- */
-
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <linux/gfp.h>
-#include <linux/err.h>
-#include <linux/socket.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-
-#include <net/sock.h>
-#include <net/tcp.h>
-
-#include <bc/beancounter.h>
-#include <bc/net.h>
-#include <bc/debug.h>
-
-/* by some reason it is not used currently */
-#define UB_SOCK_MAINTAIN_WMEMPRESSURE 0
-
-
-/* Skb truesize definition. Bad place. Den */
-
-static inline int skb_chargesize_head(struct sk_buff *skb)
-{
- return skb_charge_size(skb_end_pointer(skb) - skb->head +
- sizeof(struct skb_shared_info));
-}
-
-int skb_charge_fullsize(struct sk_buff *skb)
-{
- int chargesize;
- struct sk_buff *skbfrag;
-
- chargesize = skb_chargesize_head(skb) +
- PAGE_SIZE * skb_shinfo(skb)->nr_frags;
- if (likely(skb_shinfo(skb)->frag_list == NULL))
- return chargesize;
- for (skbfrag = skb_shinfo(skb)->frag_list;
- skbfrag != NULL;
- skbfrag = skbfrag->next) {
- chargesize += skb_charge_fullsize(skbfrag);
- }
- return chargesize;
-}
-EXPORT_SYMBOL(skb_charge_fullsize);
-
-static int ub_sock_makewreserv_locked(struct sock *sk,
- int bufid, unsigned long size);
-
-int __ub_too_many_orphans(struct sock *sk, int count)
-{
-
- struct ubparm *ub_sock;
-
- ub_sock = &sock_bc(sk)->ub->ub_parms[UB_NUMTCPSOCK];
- if (sock_has_ubc(sk) && (count >= ub_sock->barrier >> 2))
- return 1;
- return 0;
-}
-
-/*
- * Queueing
- */
-
-static void ub_sock_snd_wakeup(struct user_beancounter *ub)
-{
- struct list_head *p;
- struct sock *sk;
- struct sock_beancounter *skbc;
- struct socket *sock;
-
- while (!list_empty(&ub->ub_other_sk_list)) {
- p = ub->ub_other_sk_list.next;
- skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
- sk = skbc_sock(skbc);
-
- sock = sk->sk_socket;
- if (sock == NULL) {
- /* sk being destroyed */
- list_del_init(&skbc->ub_sock_list);
- continue;
- }
-
- ub_debug(UBD_NET_SLEEP,
- "Checking queue, waiting %lu, reserv %lu\n",
- skbc->ub_waitspc, skbc->poll_reserv);
- if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
- skbc->ub_waitspc))
- break;
-
- list_del_init(&skbc->ub_sock_list);
-
- /*
- * See comments in ub_tcp_snd_wakeup.
- * Locking note: both unix_write_space and
- * sock_def_write_space take callback_lock themselves.
- * We take it here just to be on the safe side and to
- * act the same way as ub_tcp_snd_wakeup does.
- */
- sock_hold(sk);
- spin_unlock(&ub->ub_lock);
-
- read_lock(&sk->sk_callback_lock);
- sk->sk_write_space(sk);
- read_unlock(&sk->sk_callback_lock);
-
- sock_put(sk);
-
- spin_lock(&ub->ub_lock);
- }
-}
-
-static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
-{
- struct list_head *p;
- struct sock *sk;
- struct sock_beancounter *skbc;
- struct socket *sock;
-
- while (!list_empty(&ub->ub_tcp_sk_list)) {
- p = ub->ub_tcp_sk_list.next;
- skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
- sk = skbc_sock(skbc);
-
- sock = sk->sk_socket;
- if (sock == NULL) {
- /* sk being destroyed */
- list_del_init(&skbc->ub_sock_list);
- continue;
- }
-
- ub_debug(UBD_NET_SLEEP,
- "Checking queue, waiting %lu, reserv %lu\n",
- skbc->ub_waitspc, skbc->poll_reserv);
- if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
- skbc->ub_waitspc))
- break;
-
- list_del_init(&skbc->ub_sock_list);
-
- /*
- * Send async notifications and wake up.
- * Locking note: we get callback_lock here because
- * tcp_write_space is over-optimistic about calling context
- * (socket lock is presumed). So we get the lock here although
- * it belongs to the callback.
- */
- sock_hold(sk);
- spin_unlock(&ub->ub_lock);
-
- read_lock(&sk->sk_callback_lock);
- sk->sk_write_space(sk);
- read_unlock(&sk->sk_callback_lock);
-
- sock_put(sk);
-
- spin_lock(&ub->ub_lock);
- }
-}
-
-int ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
-{
- unsigned long flags;
- struct sock_beancounter *skbc;
- struct user_beancounter *ub;
-
- if (!sock_has_ubc(sk))
- return 0;
-
- skbc = sock_bc(sk);
- ub = skbc->ub;
- spin_lock_irqsave(&ub->ub_lock, flags);
- ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
- if (!ub_sock_makewreserv_locked(sk, res, size)) {
- /*
- * It looks a bit hackish, but it is compatible with both
- * wait_for_xx_ubspace and poll.
- * This __set_current_state is equivalent to a wakeup event
- * right after spin_unlock_irqrestore.
- */
- __set_current_state(TASK_RUNNING);
- spin_unlock_irqrestore(&ub->ub_lock, flags);
- return 0;
- }
-
- ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
- skbc->ub_waitspc = size;
- if (!list_empty(&skbc->ub_sock_list)) {
- ub_debug(UBD_NET_SOCKET,
- "re-adding socket to beancounter %p.\n", ub);
- goto out;
- }
-
- switch (res) {
- case UB_TCPSNDBUF:
- list_add_tail(&skbc->ub_sock_list,
- &ub->ub_tcp_sk_list);
- break;
- case UB_OTHERSOCKBUF:
- list_add_tail(&skbc->ub_sock_list,
- &ub->ub_other_sk_list);
- break;
- default:
- BUG();
- }
-out:
- spin_unlock_irqrestore(&ub->ub_lock, flags);
- return -ENOMEM;
-}
-
-EXPORT_SYMBOL(ub_sock_snd_queue_add);
-
-long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(sk_sleep(sk), &wait);
- for (;;) {
- if (signal_pending(current))
- break;
- set_current_state(TASK_INTERRUPTIBLE);
- if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
- break;
-
- if (sk->sk_shutdown & SEND_SHUTDOWN)
- break;
- if (sk->sk_err)
- break;
- ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
- timeo = schedule_timeout(timeo);
- }
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(sk_sleep(sk), &wait);
- return timeo;
-}
-
-void ub_sock_sndqueuedel(struct sock *sk)
-{
- struct user_beancounter *ub;
- struct sock_beancounter *skbc;
- unsigned long flags;
-
- if (!sock_has_ubc(sk))
- return;
- skbc = sock_bc(sk);
-
- /* race with write_space callback of other socket */
- ub = skbc->ub;
- spin_lock_irqsave(&ub->ub_lock, flags);
- list_del_init(&skbc->ub_sock_list);
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-}
-
-/*
- * Helpers
- */
-
-static inline void __ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
- unsigned long size, int resource)
-{
- WARN_ON_ONCE(skb_bc(skb)->ub != NULL);
-
- skb_bc(skb)->ub = sock_bc(sk)->ub;
- skb_bc(skb)->charged = size;
- skb_bc(skb)->resource = resource;
-
- /* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
- if (skb->sk == NULL)
- skb->sk = sk;
-}
-
-void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
- unsigned long size, int resource)
-{
- if (!sock_has_ubc(sk))
- return;
-
- if (sock_bc(sk)->ub == NULL)
- BUG();
-
- __ub_skb_set_charge(skb, sk, size, resource);
-}
-
-EXPORT_SYMBOL(ub_skb_set_charge);
-
-static inline void ub_skb_set_uncharge(struct sk_buff *skb)
-{
- skb_bc(skb)->ub = NULL;
- skb_bc(skb)->charged = 0;
- skb_bc(skb)->resource = 0;
-}
-
-static void ub_update_rmem_thres(struct sock_beancounter *skub)
-{
- struct user_beancounter *ub;
-
- if (skub && skub->ub) {
- ub = skub->ub;
- ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
- (ub->ub_parms[UB_NUMTCPSOCK].held + 1);
- }
-}
-
-static inline void ub_sock_wcharge_dec(struct sock *sk,
- unsigned long chargesize)
-{
- /* The check sk->sk_family != PF_NETLINK is made as the skb is
- * queued to the kernel end of socket while changed to the user one.
- * Den */
- if (unlikely(sock_bc(sk)->ub_wcharged) && sk->sk_family != PF_NETLINK) {
- if (sock_bc(sk)->ub_wcharged > chargesize)
- sock_bc(sk)->ub_wcharged -= chargesize;
- else
- sock_bc(sk)->ub_wcharged = 0;
- }
-}
-
-/*
- * Charge socket number
- */
-
-static inline void sk_alloc_beancounter(struct sock *sk)
-{
- struct sock_beancounter *skbc;
-
- skbc = sock_bc(sk);
- memset(skbc, 0, sizeof(struct sock_beancounter));
-}
-
-static inline void sk_free_beancounter(struct sock *sk)
-{
-}
-
-static int __sock_charge(struct sock *sk, int res)
-{
- struct sock_beancounter *skbc;
- struct user_beancounter *ub;
- unsigned long added_reserv, added_forw;
- unsigned long flags;
-
- ub = get_exec_ub();
- if (unlikely(ub == NULL))
- return 0;
-
- sk_alloc_beancounter(sk);
- skbc = sock_bc(sk);
- INIT_LIST_HEAD(&skbc->ub_sock_list);
-
- spin_lock_irqsave(&ub->ub_lock, flags);
- if (unlikely(__charge_beancounter_locked(ub, res, 1, UB_HARD) < 0))
- goto out_limit;
-
- added_reserv = 0;
- added_forw = 0;
- if (res == UB_NUMTCPSOCK) {
- added_reserv = skb_charge_size(MAX_TCP_HEADER +
- 1500 - sizeof(struct iphdr) -
- sizeof(struct tcphdr));
- added_reserv *= 4;
- ub->ub_parms[UB_TCPSNDBUF].held += added_reserv;
- if (!ub_barrier_farsz(ub, UB_TCPSNDBUF)) {
- ub->ub_parms[UB_TCPSNDBUF].held -= added_reserv;
- added_reserv = 0;
- }
- skbc->poll_reserv = added_reserv;
- ub_adjust_maxheld(ub, UB_TCPSNDBUF);
-
- added_forw = SK_MEM_QUANTUM * 4;
- ub->ub_parms[UB_TCPRCVBUF].held += added_forw;
- if (!ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
- ub->ub_parms[UB_TCPRCVBUF].held -= added_forw;
- added_forw = 0;
- }
- skbc->forw_space = added_forw;
- ub_adjust_maxheld(ub, UB_TCPRCVBUF);
- }
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-
- skbc->ub = get_beancounter(ub);
- return 0;
-
-out_limit:
- spin_unlock_irqrestore(&ub->ub_lock, flags);
- sk_free_beancounter(sk);
- return -ENOMEM;
-}
-
-int ub_tcp_sock_charge(struct sock *sk)
-{
- int ret;
-
- ret = __sock_charge(sk, UB_NUMTCPSOCK);
- ub_update_rmem_thres(sock_bc(sk));
-
- return ret;
-}
-
-int ub_other_sock_charge(struct sock *sk)
-{
- return __sock_charge(sk, UB_NUMOTHERSOCK);
-}
-
-EXPORT_SYMBOL(ub_other_sock_charge);
-
-int ub_sock_charge(struct sock *sk, int family, int type)
-{
- return (IS_TCP_SOCK(family, type) ?
- ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
-}
-
-EXPORT_SYMBOL(ub_sock_charge);
-
-/*
- * Uncharge socket number
- */
-
-void ub_sock_uncharge(struct sock *sk)
-{
- int is_tcp_sock;
- unsigned long flags;
- struct sock_beancounter *skbc;
- struct user_beancounter *ub;
- unsigned long reserv, forw;
-
- if (unlikely(!sock_has_ubc(sk)))
- return;
-
- is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
- skbc = sock_bc(sk);
- ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
-
- ub = skbc->ub;
-
- spin_lock_irqsave(&ub->ub_lock, flags);
- if (!list_empty(&skbc->ub_sock_list)) {
- ub_debug(UBD_NET_SOCKET,
- "ub_sock_uncharge: removing from ub(%p) queue.\n",
- skbc);
- list_del_init(&skbc->ub_sock_list);
- }
-
- reserv = skbc->poll_reserv;
- forw = skbc->forw_space;
- __uncharge_beancounter_locked(ub,
- (is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
- reserv);
- if (forw)
- __uncharge_beancounter_locked(ub,
- (is_tcp_sock ? UB_TCPRCVBUF : UB_DGRAMRCVBUF),
- forw);
- __uncharge_beancounter_locked(ub,
- (is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
-
- ub_sock_wcharge_dec(sk, reserv);
- if (unlikely(skbc->ub_wcharged))
- printk(KERN_WARNING
- "ub_sock_uncharge: wch=%lu for ub %p (%s).\n",
- skbc->ub_wcharged, ub, ub->ub_name);
- skbc->poll_reserv = 0;
- skbc->forw_space = 0;
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-
- put_beancounter(ub);
- sk_free_beancounter(sk);
-}
-
-/*
- * Special case for netlink_dump - (un)charges precalculated size
- */
-
-int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
-{
- int ret;
- unsigned long chargesize;
-
- if (unlikely(!sock_has_ubc(sk)))
- return 0;
-
- chargesize = skb_charge_fullsize(skb);
- ret = charge_beancounter(sock_bc(sk)->ub,
- UB_OTHERSOCKBUF, chargesize, UB_HARD);
- if (ret < 0)
- return ret;
- ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
- return ret;
-}
-
-/*
- * Poll reserve accounting
- *
- * This is the core of socket buffer management (along with queueing/wakeup
- * functions. The rest of buffer accounting either call these functions, or
- * repeat parts of their logic for some simpler cases.
- */
-
-static int ub_sock_makewreserv_locked(struct sock *sk,
- int bufid, unsigned long size)
-{
- unsigned long wcharge_added;
- struct sock_beancounter *skbc;
- struct user_beancounter *ub;
-
- skbc = sock_bc(sk);
- if (skbc->poll_reserv >= size) /* no work to be done */
- goto out;
-
- ub = skbc->ub;
- ub->ub_parms[bufid].held += size - skbc->poll_reserv;
-
- wcharge_added = 0;
- /*
- * Logic:
- * 1) when used memory hits barrier, we set wmem_pressure;
- * wmem_pressure is reset under barrier/2;
- * between barrier/2 and barrier we limit per-socket buffer growth;
- * 2) each socket is guaranteed to get (limit-barrier)/maxsockets
- * calculated on the base of memory eaten after the barrier is hit
- */
- skbc = sock_bc(sk);
-#if UB_SOCK_MAINTAIN_WMEMPRESSURE
- if (!ub_hfbarrier_hit(ub, bufid)) {
- if (ub->ub_wmem_pressure)
- ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
- "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
- sk, size, skbc->poll_reserv,
- ub->ub_parms[bufid].held,
- skbc->ub_wcharged, sk->sk_sndbuf);
- ub->ub_wmem_pressure = 0;
- }
-#endif
- if (ub_barrier_hit(ub, bufid)) {
-#if UB_SOCK_MAINTAIN_WMEMPRESSURE
- if (!ub->ub_wmem_pressure)
- ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
- "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
- sk, size, skbc->poll_reserv,
- ub->ub_parms[bufid].held,
- skbc->ub_wcharged, sk->sk_sndbuf);
- ub->ub_wmem_pressure = 1;
-#endif
- if (sk->sk_family == PF_NETLINK)
- goto unroll;
- wcharge_added = size - skbc->poll_reserv;
- skbc->ub_wcharged += wcharge_added;
- if (skbc->ub_wcharged * ub->ub_parms[bid2sid(bufid)].limit +
- ub->ub_parms[bufid].barrier >
- ub->ub_parms[bufid].limit)
- goto unroll_wch;
- }
- if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
- goto unroll;
-
- ub_adjust_maxheld(ub, bufid);
- skbc->poll_reserv = size;
-out:
- return 0;
-
-unroll_wch:
- skbc->ub_wcharged -= wcharge_added;
-unroll:
- ub_debug(UBD_NET_SEND,
- "makewres: deny "
- "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
- sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
- skbc->ub_wcharged, sk->sk_sndbuf);
- ub->ub_parms[bufid].failcnt++;
- ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
-
- if (sk->sk_socket != NULL) {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
- return -ENOMEM;
-}
-
-int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
-{
- struct sock_beancounter *skbc;
- unsigned long flags;
- int err;
-
- skbc = sock_bc(sk);
-
- /*
- * This function provides that there is sufficient reserve upon return
- * only if sk has only one user. We can check poll_reserv without
- * serialization and avoid locking if the reserve already exists.
- */
- if (unlikely(!sock_has_ubc(sk)) || likely(skbc->poll_reserv >= size))
- return 0;
-
- spin_lock_irqsave(&skbc->ub->ub_lock, flags);
- err = ub_sock_makewreserv_locked(sk, bufid, size);
- spin_unlock_irqrestore(&skbc->ub->ub_lock, flags);
-
- return err;
-}
-
-EXPORT_SYMBOL(ub_sock_make_wreserv);
-
-int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
-{
- struct sock_beancounter *skbc;
-
- if (unlikely(!sock_has_ubc(sk)))
- return 0;
-
- /* optimize for the case if socket has sufficient reserve */
- ub_sock_make_wreserv(sk, bufid, size);
- skbc = sock_bc(sk);
- if (likely(skbc->poll_reserv >= size)) {
- skbc->poll_reserv -= size;
- return 0;
- }
- return -ENOMEM;
-}
-
-EXPORT_SYMBOL(ub_sock_get_wreserv);
-
-static void ub_sock_do_ret_wreserv(struct sock *sk, int bufid,
- unsigned long size, unsigned long ressize)
-{
- struct sock_beancounter *skbc;
- struct user_beancounter *ub;
- unsigned long extra;
- unsigned long flags;
-
- skbc = sock_bc(sk);
- ub = skbc->ub;
-
- extra = 0;
- spin_lock_irqsave(&ub->ub_lock, flags);
- skbc->poll_reserv += size;
- if (skbc->poll_reserv > ressize) {
- extra = skbc->poll_reserv - ressize;
- ub_sock_wcharge_dec(sk, extra);
- skbc->poll_reserv = ressize;
-
- __uncharge_beancounter_locked(ub, bufid, extra);
- if (bufid == UB_TCPSNDBUF)
- ub_tcp_snd_wakeup(ub);
- else
- ub_sock_snd_wakeup(ub);
- }
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-}
-
-void ub_sock_ret_wreserv(struct sock *sk, int bufid,
- unsigned long size, unsigned long ressize)
-{
- struct sock_beancounter *skbc;
-
- if (unlikely(!sock_has_ubc(sk)))
- return;
-
- skbc = sock_bc(sk);
- /* check if the reserve can be kept */
- if (ub_barrier_farsz(skbc->ub, bufid)) {
- skbc->poll_reserv += size;
- return;
- }
- ub_sock_do_ret_wreserv(sk, bufid, size, ressize);
-}
-
-/*
- * UB_DGRAMRCVBUF
- */
-
-static int ub_dgramrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
-{
- unsigned long chargesize;
-
- chargesize = skb_charge_fullsize(skb);
- if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF,
- chargesize, UB_HARD))
- return -ENOMEM;
-
- ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
- return 0;
-}
-
-int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
-{
- if (unlikely(!sock_has_ubc(sk)))
- return 0;
-
- if (IS_TCP_SOCK(sk->sk_family, sk->sk_type))
- return ub_tcprcvbuf_charge(sk, skb);
- else
- return ub_dgramrcvbuf_charge(sk, skb);
-}
-
-EXPORT_SYMBOL(ub_sockrcvbuf_charge);
-
-static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
-{
- uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
- skb_bc(skb)->charged);
- ub_skb_set_uncharge(skb);
-}
-
-/*
- * UB_TCPRCVBUF
- */
-
-int ub_sock_tcp_chargerecv(struct sock *sk, struct sk_buff *skb,
- enum ub_severity strict)
-{
- int retval;
- unsigned long flags;
- struct user_beancounter *ub;
- struct sock_beancounter *skbc;
- unsigned long chargesize;
-
- if (unlikely(!sock_has_ubc(sk)))
- return 0;
- skbc = sock_bc(sk);
-
- chargesize = skb_charge_fullsize(skb);
- if (likely(skbc->forw_space >= chargesize)) {
- skbc->forw_space -= chargesize;
- __ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
- return 0;
- }
-
- /*
- * Memory pressure reactions:
- * 1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
- * 2) set UB_RMEM_SHRINK and tcp_clamp_window()
- * tcp_collapse_queues() if rmem_alloc > rcvbuf
- * 3) drop OFO, tcp_purge_ofo()
- * 4) drop all.
- * Currently, we do #2 and #3 at once (which means that current
- * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
- * for example...)
- * On memory pressure we jump from #0 to #3, and when the pressure
- * subsides, to #1.
- */
- retval = 0;
- ub = sock_bc(sk)->ub;
- spin_lock_irqsave(&ub->ub_lock, flags);
- ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
- if (ub->ub_parms[UB_TCPRCVBUF].held >
- ub->ub_parms[UB_TCPRCVBUF].barrier &&
- strict != UB_FORCE)
- goto excess;
- ub_adjust_maxheld(ub, UB_TCPRCVBUF);
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-
-out:
- if (retval == 0)
- ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
- return retval;
-
-excess:
- ub->ub_rmem_pressure = UB_RMEM_SHRINK;
- if (strict == UB_HARD)
- retval = -ENOMEM;
- if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
- retval = -ENOMEM;
- /*
- * We try to leave numsock*maxadvmss as a reserve for sockets not
- * queueing any data yet (if the difference between the barrier and the
- * limit is enough for this reserve).
- */
- if (ub->ub_parms[UB_TCPRCVBUF].held +
- ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
- > ub->ub_parms[UB_TCPRCVBUF].limit &&
- atomic_read(&sk->sk_rmem_alloc))
- retval = -ENOMEM;
- if (retval) {
- ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
- ub->ub_parms[UB_TCPRCVBUF].failcnt++;
- }
- ub_adjust_maxheld(ub, UB_TCPRCVBUF);
- spin_unlock_irqrestore(&ub->ub_lock, flags);
- goto out;
-}
-EXPORT_SYMBOL(ub_sock_tcp_chargerecv);
-
-static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
-{
- unsigned long flags;
- unsigned long held, bar;
- int prev_pres;
- struct user_beancounter *ub;
-
- ub = skb_bc(skb)->ub;
- if (ub_barrier_farsz(ub, UB_TCPRCVBUF)) {
- sock_bc(skb->sk)->forw_space += skb_bc(skb)->charged;
- ub_skb_set_uncharge(skb);
- return;
- }
-
- spin_lock_irqsave(&ub->ub_lock, flags);
- if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
- printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
- skb_bc(skb)->charged,
- ub, ub->ub_parms[UB_TCPRCVBUF].held);
- /* ass-saving bung */
- skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
- }
- ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
- held = ub->ub_parms[UB_TCPRCVBUF].held;
- bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
- prev_pres = ub->ub_rmem_pressure;
- if (held <= bar - (bar >> 2))
- ub->ub_rmem_pressure = UB_RMEM_EXPAND;
- else if (held <= bar)
- ub->ub_rmem_pressure = UB_RMEM_KEEP;
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-
- ub_skb_set_uncharge(skb);
-}
-
-
-/*
- * UB_OTHERSOCKBUF and UB_TCPSNDBUF
- */
-
-static void ub_socksndbuf_uncharge(struct sk_buff *skb)
-{
- unsigned long flags;
- struct user_beancounter *ub;
- unsigned long chargesize;
-
- ub = skb_bc(skb)->ub;
- chargesize = skb_bc(skb)->charged;
-
- spin_lock_irqsave(&ub->ub_lock, flags);
- __uncharge_beancounter_locked(ub, UB_OTHERSOCKBUF, chargesize);
- if (skb->sk != NULL && sock_has_ubc(skb->sk))
- ub_sock_wcharge_dec(skb->sk, chargesize);
- ub_sock_snd_wakeup(ub);
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-
- ub_skb_set_uncharge(skb);
-}
-
-/* expected to be called under socket lock */
-static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
-{
- if (WARN_ON(!skb->sk))
- return;
- /*
- * ub_sock_ret_wreserv call is abused here, we just want to uncharge
- * skb size. However, to reduce duplication of the code doing
- * ub_hfbarrier_hit check, ub_wcharged reduction, and wakeup we call
- * a function that already does all of this. 2006/04/27 SAW
- */
- ub_sock_ret_wreserv(skb->sk, UB_TCPSNDBUF, skb_bc(skb)->charged,
- sock_bc(skb->sk)->poll_reserv);
- ub_skb_set_uncharge(skb);
-}
-
-void ub_skb_uncharge(struct sk_buff *skb)
-{
- switch (skb_bc(skb)->resource) {
- case UB_TCPSNDBUF:
- ub_tcpsndbuf_uncharge(skb);
- break;
- case UB_TCPRCVBUF:
- ub_tcprcvbuf_uncharge(skb);
- break;
- case UB_DGRAMRCVBUF:
- ub_sockrcvbuf_uncharge(skb);
- break;
- case UB_OTHERSOCKBUF:
- ub_socksndbuf_uncharge(skb);
- break;
- }
-}
-
-EXPORT_SYMBOL(ub_skb_uncharge); /* due to skb_orphan()/conntracks */
-
-/*
- * Other sock reserve managment
- */
-
-int ub_sock_getwres_other(struct sock *sk, unsigned long size)
-{
- struct sock_beancounter *skbc;
- struct user_beancounter *ub;
- unsigned long flags;
- int err;
-
- if (unlikely(!sock_has_ubc(sk)))
- return 0;
-
- /*
- * Nothing except beancounter lock protects skbc->poll_reserv.
- * So, take the lock and do the job.
- */
- skbc = sock_bc(sk);
- ub = skbc->ub;
- spin_lock_irqsave(&ub->ub_lock, flags);
- err = ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF, size);
- if (!err)
- skbc->poll_reserv -= size;
- spin_unlock_irqrestore(&ub->ub_lock, flags);
-
- return err;
-}
-EXPORT_SYMBOL(ub_sock_getwres_other);
-
-void ub_sock_retwres_other(struct sock *sk,
- unsigned long size, unsigned long ressize)
-{
- if (unlikely(!sock_has_ubc(sk)))
- return;
-
- ub_sock_do_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
-}
-
-/*
- * TCP send buffers accouting. Paged part
- */
-
-int ub_sock_tcp_chargepage(struct sock *sk)
-{
- struct sock_beancounter *skbc;
- unsigned long extra;
- int err;
-
- if (unlikely(!sock_has_ubc(sk)))
- return 0;
-
- skbc = sock_bc(sk);
- ub_sock_make_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE);
- if (likely(skbc->poll_reserv >= PAGE_SIZE)) {
- skbc->poll_reserv -= PAGE_SIZE;
- return 0;
- }
-
- /*
- * Ok, full page is not available.
- * However, this function must succeed if poll previously indicated
- * that write is possible. We better make a forced charge here
- * than reserve a whole page in poll.
- */
- err = ub_sock_make_wreserv(sk, UB_TCPSNDBUF, SOCK_MIN_UBCSPACE);
- if (unlikely(err < 0))
- goto out;
- if (skbc->poll_reserv < PAGE_SIZE) {
- extra = PAGE_SIZE - skbc->poll_reserv;
- err = charge_beancounter(skbc->ub, UB_TCPSNDBUF, extra,
- UB_FORCE);
- if (err < 0)
- goto out;
- skbc->poll_reserv += extra;
- }
- skbc->poll_reserv -= PAGE_SIZE;
- return 0;
-
-out:
- return err;
-}
-
-void ub_sock_tcp_detachpage(struct sock *sk)
-{
- struct sk_buff *skb;
-
- if (unlikely(!sock_has_ubc(sk)))
- return;
-
- /* The page is just detached from socket. The last skb in queue
- with paged part holds referrence to it */
- skb = skb_peek_tail(&sk->sk_write_queue);
- if (skb == NULL) {
- /* If the queue is empty - all data is sent and page is about
- to be freed */
- ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, PAGE_SIZE,
- sock_bc(sk)->poll_reserv);
- } else {
- /* Last skb is a good aproximation for a last skb with
- paged part */
- skb_bc(skb)->charged += PAGE_SIZE;
- }
-}
-
-/*
- * TCPSNDBUF charge functions below are called in the following cases:
- * - sending of SYN, SYN-ACK, FIN, the latter charge is forced by
- * some technical reasons in TCP code;
- * - fragmentation of TCP packets.
- * These functions are allowed but not required to use poll_reserv.
- * Originally, these functions didn't do that, since it didn't make
- * any sense. Now, since poll_reserv now has a function of general reserve,
- * they use it.
- */
-int ub_sock_tcp_chargesend(struct sock *sk, struct sk_buff *skb,
- enum ub_severity strict)
-{
- int ret;
- unsigned long chargesize;
- struct sock_beancounter *skbc;
- struct user_beancounter *ub;
- unsigned long flags;
-
- if (unlikely(!sock_has_ubc(sk)))
- return 0;
-
- skbc = sock_bc(sk);
- chargesize = skb_charge_fullsize(skb);
- if (likely(skbc->poll_reserv >= chargesize)) {
- skbc->poll_reserv -= chargesize;
- __ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
- /* XXX hack, see ub_skb_set_charge */
- skb->sk = sk;
- return 0;
- }
-
- ub = skbc->ub;
- spin_lock_irqsave(&ub->ub_lock, flags);
- ret = __charge_beancounter_locked(ub, UB_TCPSNDBUF,
- chargesize, strict);
- /*
- * Note: this check is not equivalent of the corresponding check
- * in makewreserv. It's similar in spirit, but an equivalent check
- * would be too long and complicated here.
- */
- if (!ret && ub_barrier_hit(ub, UB_TCPSNDBUF))
- skbc->ub_wcharged += chargesize;
- spin_unlock_irqrestore(&ub->ub_lock, flags);
- if (likely(!ret))
- ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
- return ret;
-}
-EXPORT_SYMBOL(ub_sock_tcp_chargesend);
-
-/*
- * Initialization
- */
-
-int __init skbc_cache_init(void)
-{
- return 0;
-}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cfc1c16..8ab48cd 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -754,7 +754,6 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
unsigned int mask;
- int no_ubc_space;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
@@ -766,14 +765,8 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
- if (sk->sk_shutdown == SHUTDOWN_MASK) {
- no_ubc_space = 0;
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
- } else {
- no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
- if (no_ubc_space)
- ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
- }
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -789,7 +782,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
}
/* writable? */
- if (!no_ubc_space && sock_writeable(sk))
+ if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 05011c2..fdc3180 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -244,9 +244,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
goto out;
prefetchw(skb);
- if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
- goto nobc;
-
/* We do our best to align skb_shared_info on a separate cache
* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
@@ -302,8 +299,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
out:
return skb;
nodata:
- ub_skb_free_bc(skb);
-nobc:
kmem_cache_free(cache, skb);
skb = NULL;
goto out;
@@ -337,11 +332,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
if (!skb)
return NULL;
- if (ub_skb_alloc_bc(skb, GFP_ATOMIC)) {
- kmem_cache_free(skbuff_head_cache, skb);
- return NULL;
- }
-
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
memset(skb, 0, offsetof(struct sk_buff, tail));
@@ -552,7 +542,6 @@ static void kfree_skbmem(struct sk_buff *skb)
struct sk_buff *other;
atomic_t *fclone_ref;
- ub_skb_free_bc(skb);
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
kmem_cache_free(skbuff_head_cache, skb);
@@ -585,7 +574,6 @@ static void skb_release_head_state(struct sk_buff *skb)
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
#endif
- ub_skb_uncharge(skb);
if (skb->destructor) {
WARN_ON(in_irq());
skb->destructor(skb);
@@ -918,10 +906,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
- if (ub_skb_alloc_bc(n, gfp_mask)) {
- kmem_cache_free(skbuff_head_cache, n);
- return NULL;
- }
return __skb_clone(n, skb);
}
EXPORT_SYMBOL(skb_clone);
diff --git a/net/core/sock.c b/net/core/sock.c
index a96efdd..7b7c804 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -131,7 +131,6 @@
#include <net/cls_cgroup.h>
#include <net/netprio_cgroup.h>
-#include <bc/net.h>
#include <bc/beancounter.h>
#include <linux/filter.h>
@@ -1298,7 +1297,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
slab = prot->slab;
security_sk_free(sk);
- ub_sock_uncharge(sk);
if (slab != NULL)
kmem_cache_free(slab, sk);
else
@@ -1482,11 +1480,15 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
if (filter != NULL)
sk_filter_charge(newsk, filter);
- if (ub_sock_charge(newsk, newsk->sk_family, newsk->sk_type) < 0)
- goto out_err;
-
- if (unlikely(xfrm_sk_clone_policy(newsk)))
- goto out_err;
+ if (unlikely(xfrm_sk_clone_policy(newsk))) {
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ newsk->sk_destruct = NULL;
+ bh_unlock_sock(newsk);
+ sk_free(newsk);
+ newsk = NULL;
+ goto out;
+ }
newsk->sk_err = 0;
newsk->sk_priority = 0;
@@ -1520,15 +1522,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
}
+out:
return newsk;
-
-out_err:
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- sock_reset_flag(newsk, SOCK_TIMESTAMP);
- newsk->sk_destruct = NULL;
- sk_free(newsk);
- return NULL;
}
EXPORT_SYMBOL_GPL(sk_clone_lock);
@@ -1739,7 +1734,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
long timeo;
int err;
int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
- unsigned long size = header_len + data_len;
err = -EMSGSIZE;
if (npages > MAX_SKB_FRAGS)
@@ -1759,18 +1753,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- err = -EAGAIN;
- if (!timeo)
- goto failure;
- if (signal_pending(current))
- goto interrupted;
- timeo = ub_sock_wait_for_space(sk, timeo,
- skb_charge_size(size));
- continue;
- }
-
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
skb = alloc_skb(header_len, gfp_mask);
if (skb) {
@@ -1805,17 +1787,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
break;
}
- ub_sock_retwres_other(sk, skb_charge_size(size),
- SOCK_MIN_UBCSPACE_CH);
-
err = -ENOBUFS;
goto failure;
}
-
- ub_sock_retwres_other(sk,
- skb_charge_size(size),
- SOCK_MIN_UBCSPACE_CH);
-
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
err = -EAGAIN;
@@ -1826,8 +1800,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
timeo = sock_wait_for_wmem(sk, timeo);
}
- ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
-
skb_set_owner_w(skb, sk);
return skb;
diff --git a/net/core/stream.c b/net/core/stream.c
index d00ce69..f5df85d 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -113,10 +113,8 @@ EXPORT_SYMBOL(sk_stream_wait_close);
* sk_stream_wait_memory - Wait for more memory for a socket
* @sk: socket to wait for memory
* @timeo_p: for how long
- * @amount - amount of memory to wait for (in UB space!)
*/
-int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
- unsigned long amount)
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
{
int err = 0;
long vm_wait = 0;
@@ -138,10 +136,7 @@ int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
if (signal_pending(current))
goto do_interrupted;
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- if (amount == 0) {
- if (sk_stream_memory_free(sk) && !vm_wait)
- break;
- } else if (!ub_sock_sndqueueadd_tcp(sk, amount))
+ if (sk_stream_memory_free(sk) && !vm_wait)
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -151,8 +146,6 @@ int __sk_stream_wait_memory(struct sock *sk, long *timeo_p,
(sk_stream_memory_free(sk) &&
!vm_wait));
sk->sk_write_pending--;
- if (amount > 0)
- ub_sock_sndqueuedel(sk);
if (vm_wait) {
vm_wait -= current_timeo;
@@ -178,11 +171,6 @@ do_interrupted:
err = sock_intr_errno(*timeo_p);
goto out;
}
-
-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
-{
- return __sk_stream_wait_memory(sk, timeo_p, 0);
-}
EXPORT_SYMBOL(sk_stream_wait_memory);
int sk_stream_error(struct sock *sk, int flags, int err)
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 3e9e21c..9e2f78b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -20,8 +20,6 @@
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
-#include <bc/sock_orphan.h>
-
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
@@ -49,8 +47,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
{
struct inet_timewait_sock *tw = NULL;
- if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets &&
- ub_timewait_check(sk, &dccp_death_row))
+ if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
tw = inet_twsk_alloc(sk, state);
if (tw != NULL) {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 80b140d..17314cd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -118,7 +118,6 @@
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
-#include <bc/net.h>
/* The inetsw table contains everything that inet_create needs to
@@ -358,13 +357,6 @@ lookup_protocol:
if (sk == NULL)
goto out;
- err = -ENOBUFS;
- if (ub_sock_charge(sk, PF_INET, sock->type))
- goto out_sk_free;
- /* if charge was successful, sock_init_data() MUST be called to
- * set sk->sk_type. otherwise sk will be uncharged to wrong resource
- */
-
err = 0;
sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
@@ -425,9 +417,6 @@ out:
out_rcu_unlock:
rcu_read_unlock();
goto out;
-out_sk_free:
- sk_free(sk);
- return err;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6ad0a6c..07622a4 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,9 +24,6 @@
#include <net/tcp_states.h>
#include <net/xfrm.h>
-#include <bc/net.h>
-#include <bc/sock_orphan.h>
-
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
EXPORT_SYMBOL(inet_csk_timer_bug_msg);
@@ -729,7 +726,7 @@ void inet_csk_destroy_sock(struct sock *sk)
sk_refcnt_debug_release(sk);
- ub_dec_orphan_count(sk);
+ percpu_counter_dec(sk->sk_prot->orphan_count);
sock_put(sk);
}
EXPORT_SYMBOL(inet_csk_destroy_sock);
@@ -825,7 +822,7 @@ void inet_csk_listen_stop(struct sock *sk)
sock_orphan(child);
- ub_inc_orphan_count(sk);
+ percpu_counter_inc(sk->sk_prot->orphan_count);
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
BUG_ON(tcp_sk(child)->fastopen_rsk != req);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index e2bda07..6d592f8 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -66,8 +66,6 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
return 1;
}
-#include <bc/sock_orphan.h>
-
/* Must be called with locally disabled BHs. */
static void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
@@ -100,9 +98,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw)
#ifdef SOCK_REFCNT_DEBUG
pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw);
#endif
-#ifdef CONFIG_BEANCOUNTERS
- put_beancounter(tw->tw_ub);
-#endif
release_net(twsk_net(tw));
kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
module_put(owner);
@@ -176,11 +171,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
{
- struct inet_timewait_sock *tw;
-
- tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
- GFP_ATOMIC);
-
+ struct inet_timewait_sock *tw =
+ kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+ GFP_ATOMIC);
if (tw != NULL) {
const struct inet_sock *inet = inet_sk(sk);
@@ -211,9 +204,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
atomic_set(&tw->tw_refcnt, 0);
inet_twsk_dead_node_init(tw);
__module_get(tw->tw_prot->owner);
-#ifdef CONFIG_BEANCOUNTERS
- tw->tw_ub = get_beancounter(get_exec_ub());
-#endif
}
return tw;
@@ -239,7 +229,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
rescan:
inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) {
__inet_twsk_del_dead_node(tw);
- ub_timewait_dec(tw, twdr);
spin_unlock(&twdr->death_lock);
__inet_twsk_kill(tw, twdr->hashinfo);
#ifdef CONFIG_NET_NS
@@ -338,7 +327,6 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw,
{
spin_lock(&twdr->death_lock);
if (inet_twsk_del_dead_node(tw)) {
- ub_timewait_dec(tw, twdr);
inet_twsk_put(tw);
if (--twdr->tw_count == 0)
del_timer(&twdr->tw_timer);
@@ -384,10 +372,9 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw,
spin_lock(&twdr->death_lock);
/* Unlink it, if it was scheduled */
- if (inet_twsk_del_dead_node(tw)) {
- ub_timewait_dec(tw, twdr);
+ if (inet_twsk_del_dead_node(tw))
twdr->tw_count--;
- } else
+ else
atomic_inc(&tw->tw_refcnt);
if (slot >= INET_TWDR_RECYCLE_SLOTS) {
@@ -423,7 +410,6 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw,
hlist_add_head(&tw->tw_death_node, list);
- ub_timewait_inc(tw, twdr);
if (twdr->tw_count++ == 0)
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
spin_unlock(&twdr->death_lock);
@@ -457,7 +443,6 @@ void inet_twdr_twcal_tick(unsigned long data)
&twdr->twcal_row[slot]) {
__inet_twsk_del_dead_node(tw);
__inet_twsk_kill(tw, twdr->hashinfo);
- ub_timewait_dec(tw, twdr);
#ifdef CONFIG_NET_NS
NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
#endif
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3a85b13..463bd12 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -55,7 +55,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
int orphans, sockets;
local_bh_disable();
- orphans = percpu_counter_sum_positive(&get_exec_ub()->ub_orphan_count);
+ orphans = percpu_counter_sum_positive(&tcp_orphan_count);
sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
local_bh_enable();
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 33c8133..e641406 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -278,10 +278,6 @@
#include <net/netdma.h>
#include <net/sock.h>
-#include <bc/sock_orphan.h>
-#include <bc/net.h>
-#include <bc/tcp.h>
-
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
@@ -446,7 +442,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
unsigned int mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
- int check_send_space;
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == TCP_LISTEN)
@@ -459,21 +454,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask = 0;
- check_send_space = 1;
-#ifdef CONFIG_BEANCOUNTERS
- if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
- unsigned long size;
- size = MAX_TCP_HEADER + tp->mss_cache;
- if (size > SOCK_MIN_UBCSPACE)
- size = SOCK_MIN_UBCSPACE;
- size = skb_charge_size(size);
- if (ub_sock_makewres_tcp(sk, size)) {
- check_send_space = 0;
- ub_sock_sndqueueadd_tcp(sk, size);
- }
- }
-#endif
-
/*
* POLLHUP is certainly not done right. But poll() doesn't
* have a notion of HUP in just one direction, and for a
@@ -522,7 +502,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (tp->rcv_nxt - tp->copied_seq >= target)
mask |= POLLIN | POLLRDNORM;
- if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
@@ -893,23 +873,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
struct sk_buff *skb = tcp_write_queue_tail(sk);
int copy, i;
bool can_coalesce;
- unsigned long chargesize = 0;
if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
new_segment:
- chargesize = 0;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- chargesize = skb_charge_size(MAX_TCP_HEADER +
- tp->mss_cache);
- if (ub_sock_getwres_tcp(sk, chargesize) < 0)
- goto wait_for_ubspace;
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
if (!skb)
goto wait_for_memory;
- ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
- chargesize = 0;
skb_entail(sk, skb);
copy = size_goal;
@@ -966,14 +938,9 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- ub_sock_retwres_tcp(sk, chargesize,
- skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
- chargesize = 0;
-wait_for_ubspace:
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
- err = __sk_stream_wait_memory(sk, &timeo, chargesize);
- if (err != 0)
+ if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1008,8 +975,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(tcp_sendpage);
-static inline int select_size(const struct sock *sk, bool sg, struct tcp_sock *tp)
+static inline int select_size(const struct sock *sk, bool sg)
{
+ const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sg) {
@@ -1129,7 +1097,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
unsigned char __user *from = iov->iov_base;
- unsigned long chargesize = 0;
iov++;
if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */
@@ -1154,27 +1121,18 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
if (copy <= 0) {
- unsigned long size;
new_segment:
/* Allocate new segment. If the interface is SG,
* allocate skb fitting to single page.
*/
- chargesize = 0;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- size = select_size(sk, sg, tp);
- chargesize = skb_charge_size(MAX_TCP_HEADER +
- size);
- if (ub_sock_getwres_tcp(sk, chargesize) < 0)
- goto wait_for_ubspace;
- skb = sk_stream_alloc_skb(sk, size,
+ skb = sk_stream_alloc_skb(sk,
+ select_size(sk, sg),
sk->sk_allocation);
if (!skb)
goto wait_for_memory;
- ub_skb_set_charge(skb, sk, chargesize,
- UB_TCPSNDBUF);
- chargesize = 0;
/*
* All packets are restored as if they have
@@ -1270,15 +1228,10 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- ub_sock_retwres_tcp(sk, chargesize,
- skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
- chargesize = 0;
-wait_for_ubspace:
if (copied && likely(!tp->repair))
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
- err = __sk_stream_wait_memory(sk, &timeo, chargesize);
- if (err != 0)
+ if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -2127,20 +2080,12 @@ EXPORT_SYMBOL(tcp_shutdown);
bool tcp_check_oom(struct sock *sk, int shift)
{
bool too_many_orphans, out_of_socket_memory;
- int orphans = ub_get_orphan_count(sk);
- too_many_orphans = ub_too_many_orphans(sk, orphans);
+ too_many_orphans = tcp_too_many_orphans(sk, shift);
out_of_socket_memory = tcp_out_of_memory(sk);
- if (too_many_orphans) {
- const char *ubid = "0";
-#ifdef CONFIG_BEANCOUNTERS
- if (sock_has_ubc(sk))
- ubid = sock_bc(sk)->ub->ub_name;
-#endif
- net_info_ratelimited("too many orphaned sockets (%d in CT%s)\n",
- orphans, ubid);
- }
+ if (too_many_orphans)
+ net_info_ratelimited("too many orphaned sockets\n");
if (out_of_socket_memory)
net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
return too_many_orphans || out_of_socket_memory;
@@ -2250,7 +2195,7 @@ adjudge_to_death:
bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk));
- ub_inc_orphan_count(sk);
+ percpu_counter_inc(sk->sk_prot->orphan_count);
/* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
@@ -3236,7 +3181,6 @@ void __init tcp_init(void)
percpu_counter_init(&tcp_sockets_allocated, 0);
percpu_counter_init(&tcp_orphan_count, 0);
- percpu_counter_init(&get_ub0()->ub_orphan_count, 0);
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4e1ee3f..e0a231e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -75,8 +75,6 @@
#include <asm/unaligned.h>
#include <net/netdma.h>
-#include <bc/tcp.h>
-
int sysctl_tcp_timestamps __read_mostly = 1;
int sysctl_tcp_window_scaling __read_mostly = 1;
int sysctl_tcp_sack __read_mostly = 1;
@@ -406,8 +404,6 @@ void tcp_init_buffer_space(struct sock *sk)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_time_stamp;
-
- ub_tcp_update_maxadvmss(sk);
}
/* 5. Recalculate window clamp after socket hit its memory bounds. */
@@ -4548,10 +4544,6 @@ restart:
nskb = alloc_skb(copy + header, GFP_ATOMIC);
if (!nskb)
return;
- if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
- kfree_skb(nskb);
- return;
- }
skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
skb_set_network_header(nskb, (skb_network_header(skb) -
@@ -5255,11 +5247,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
- /* This is OK not to try to free memory here.
- * Do this below on slow path. Den */
- if (ub_tcprcvbuf_charge(sk, skb) < 0)
- goto step5;
-
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a8ef57a..0ada1d5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -77,8 +77,6 @@
#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
-#include <bc/tcp.h>
-
#include <linux/inet.h>
#include <linux/ipv6.h>
#include <linux/stddef.h>
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d8b99a8..17b1be4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -26,7 +26,6 @@
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
-#include <bc/sock_orphan.h>
int sysctl_tcp_syncookies __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_syncookies);
@@ -52,7 +51,6 @@ struct inet_timewait_death_row tcp_death_row = {
.twcal_hand = -1,
.twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
(unsigned long)&tcp_death_row),
- .ub_managed = 1,
};
EXPORT_SYMBOL_GPL(tcp_death_row);
@@ -280,8 +278,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
recycle_ok = tcp_remember_stamp(sk);
- if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets &&
- ub_timewait_check(sk, &tcp_death_row))
+ if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
tw = inet_twsk_alloc(sk, state);
if (tw != NULL) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 983f919..13d440b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,9 +42,6 @@
#include <linux/gfp.h>
#include <linux/module.h>
-#include <bc/net.h>
-#include <bc/tcp.h>
-
/* People can turn this off for buggy TCP's found in printers etc. */
int sysctl_tcp_retrans_collapse __read_mostly = 1;
@@ -1063,19 +1060,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
- if (skb_cloned(skb)) {
- ub_skb_uncharge(skb);
- ub_tcpsndbuf_charge_forced(sk, skb);
- }
-
/* Get a new skb... force flag on. */
buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
- if (ub_tcpsndbuf_charge(sk, buff) < 0) {
- kfree_skb(buff);
- return -ENOMEM;
- }
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
@@ -1564,11 +1552,6 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
if (unlikely(buff == NULL))
return -ENOMEM;
- if (ub_tcpsndbuf_charge(sk, buff) < 0) {
- kfree_skb(buff);
- return -ENOMEM;
- }
-
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
buff->truesize += nlen;
@@ -2593,7 +2576,6 @@ void tcp_send_fin(struct sock *sk)
break;
yield();
}
- ub_tcpsndbuf_charge_forced(sk, skb);
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
@@ -2653,10 +2635,6 @@ int tcp_send_synack(struct sock *sk)
struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
if (nskb == NULL)
return -ENOMEM;
- if (ub_tcpsndbuf_charge(sk, nskb) < 0) {
- kfree_skb(nskb);
- return -ENOMEM;
- }
tcp_unlink_write_queue(skb, sk);
skb_header_release(nskb);
__tcp_add_write_queue_head(sk, nskb);
@@ -2969,10 +2947,6 @@ int tcp_connect(struct sock *sk)
buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
if (unlikely(buff == NULL))
return -ENOBUFS;
- if (ub_tcpsndbuf_charge(sk, buff) < 0) {
- kfree_skb(buff);
- return -ENOBUFS;
- }
/* Reserve space for headers. */
skb_reserve(buff, MAX_TCP_HEADER);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 588218a..848bd50 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -63,7 +63,6 @@
#ifdef CONFIG_IPV6_MIP6
#include <net/mip6.h>
#endif
-#include <bc/net.h>
#include <asm/uaccess.h>
#include <linux/mroute6.h>
@@ -186,13 +185,6 @@ lookup_protocol:
if (sk == NULL)
goto out;
- err = -ENOBUFS;
- if (ub_sock_charge(sk, PF_INET6, sock->type))
- goto out_sk_free;
- /* if charge was successful, sock_init_data() MUST be called to
- * set sk->sk_type. otherwise sk will be uncharged to wrong resource
- */
-
sock_init_data(sock, sk);
err = 0;
@@ -268,9 +260,6 @@ out:
out_rcu_unlock:
rcu_read_unlock();
goto out;
-out_sk_free:
- sk_free(sk);
- return err;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 28e35ea..2727bb3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -65,8 +65,6 @@
#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
-#include <bc/tcp.h>
-
#include <asm/uaccess.h>
#include <linux/proc_fs.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 017c755..d0b2af5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,7 +54,6 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/sock.h>
-#include <bc/sock.h>
#define NF_CONNTRACK_VERSION "0.5.0"
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 65962af..1f93d7e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -56,7 +56,6 @@
#include <net/sock.h>
#include <bc/beancounter.h>
-#include <bc/sock.h>
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 3fa4086..a6a6942 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -27,6 +27,7 @@
#include <linux/slab.h>
#include <linux/audit.h>
#include <net/net_namespace.h>
+#include <bc/beancounter.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp.h>
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a54e578..94d635f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -65,9 +65,6 @@
#include <net/scm.h>
#include <net/netlink.h>
-#include <bc/beancounter.h>
-#include <bc/net.h>
-
#include "af_netlink.h"
struct listeners {
@@ -1178,8 +1175,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
if (!sk)
return -ENOMEM;
- if (ub_other_sock_charge(sk))
- goto out_free;
sock_init_data(sock, sk);
@@ -1198,10 +1193,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
sk->sk_destruct = netlink_sock_destruct;
sk->sk_protocol = protocol;
return 0;
-
-out_free:
- sk_free(sk);
- return -ENOMEM;
}
static int netlink_create(struct net *net, struct socket *sock, int protocol,
@@ -1605,21 +1596,13 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk)
{
struct netlink_sock *nlk;
- unsigned long chargesize;
- int no_ubc;
nlk = nlk_sk(sk);
- chargesize = skb_charge_fullsize(skb);
- no_ubc = ub_sock_getwres_other(sk, chargesize);
- if ((no_ubc || atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+ if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
test_bit(NETLINK_CONGESTED, &nlk->state)) &&
!netlink_skb_is_mmaped(skb)) {
DECLARE_WAITQUEUE(wait, current);
-
- if (!no_ubc)
- ub_sock_retwres_other(sk, chargesize,
- SOCK_MIN_UBCSPACE_CH);
if (!*timeo) {
if (!ssk || netlink_is_kernel(ssk))
netlink_overrun(sk);
@@ -1631,20 +1614,13 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
__set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&nlk->wait, &wait);
- /* this if can't be moved upper because ub_sock_snd_queue_add()
- * may change task state to TASK_RUNNING */
- if (no_ubc)
- ub_sock_sndqueueadd_other(sk, chargesize);
-
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state) || no_ubc) &&
+ test_bit(NETLINK_CONGESTED, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
*timeo = schedule_timeout(*timeo);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&nlk->wait, &wait);
- if (no_ubc)
- ub_sock_sndqueuedel(sk);
sock_put(sk);
if (signal_pending(current)) {
@@ -1654,7 +1630,6 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
return 1;
}
netlink_skb_set_owner_r(skb, sk);
- ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
return 0;
}
@@ -2626,10 +2601,6 @@ static int netlink_dump(struct sock *sk)
skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
if (!skb)
goto errout_skb;
- if (ub_nlrcvbuf_charge(skb, sk) < 0) {
- err = -EACCES;
- goto errout_skb;
- }
netlink_skb_set_owner_r(skb, sk);
len = cb->dump(skb, cb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 58688c2..e8b5a0d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -89,8 +89,6 @@
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
-#include <bc/net.h>
-
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
@@ -1640,8 +1638,6 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
if (!net_eq(dev_net(dev), sock_net(sk)))
goto drop;
- skb_orphan(skb);
-
skb->dev = dev;
if (dev->header_ops) {
@@ -1704,9 +1700,6 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
- if (ub_sockrcvbuf_charge(sk, skb))
- goto drop_n_acct;
-
skb_set_owner_r(skb, sk);
skb->dev = NULL;
skb_dst_drop(skb);
@@ -1763,8 +1756,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (!net_eq(dev_net(dev), sock_net(sk)))
goto drop;
- skb_orphan(skb);
-
if (dev->header_ops) {
if (sk->sk_type != SOCK_DGRAM)
skb_push(skb, skb->data - skb_mac_header(skb));
@@ -1813,13 +1804,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
snaplen = 0;
}
}
-
- if (copy_skb &&
- ub_sockrcvbuf_charge(sk, copy_skb)) {
- spin_lock(&sk->sk_receive_queue.lock);
- goto ring_is_full;
- }
-
spin_lock(&sk->sk_receive_queue.lock);
h.raw = packet_current_rx_frame(po, skb,
TP_STATUS_KERNEL, (macoff+snaplen));
@@ -2634,8 +2618,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
if (sk == NULL)
goto out;
- if (ub_other_sock_charge(sk))
- goto out_free;
sock->ops = &packet_ops;
if (sock->type == SOCK_PACKET)
@@ -2679,9 +2661,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
preempt_enable();
return 0;
-
-out_free:
- sk_free(sk);
out:
return err;
}
diff --git a/net/socket.c b/net/socket.c
index 94ce25f..df73b9d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -587,9 +587,6 @@ const struct file_operations bad_sock_fops = {
void sock_release(struct socket *sock)
{
- if (sock->sk)
- ub_sock_sndqueuedel(sock->sk);
-
if (sock->ops) {
struct module *owner = sock->ops->owner;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b625bbb..6c66e8d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -116,9 +116,6 @@
#include <linux/security.h>
#include <linux/freezer.h>
-#include <bc/net.h>
-#include <bc/beancounter.h>
-
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
DEFINE_SPINLOCK(unix_table_lock);
@@ -635,8 +632,6 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
if (!sk)
goto out;
- if (ub_other_sock_charge(sk))
- goto out_sk_free;
sock_init_data(sock, sk);
lockdep_set_class(&sk->sk_receive_queue.lock,
@@ -663,10 +658,6 @@ out:
local_bh_enable();
}
return sk;
-out_sk_free:
- sk_free(sk);
- atomic_long_dec(&unix_nr_socks);
- return NULL;
}
static int unix_create(struct net *net, struct socket *sock, int protocol,
@@ -1073,7 +1064,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int st;
int err;
long timeo;
- unsigned long chargesize;
err = unix_mkname(sunaddr, addr_len, &hash);
if (err < 0)
@@ -1102,10 +1092,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
if (skb == NULL)
goto out;
- chargesize = skb_charge_fullsize(skb);
- if (ub_sock_getwres_other(newsk, chargesize) < 0)
- goto out;
- ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
restart:
/* Find listening sock. */
@@ -1666,16 +1652,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
size = len-sent;
- if (msg->msg_flags & MSG_DONTWAIT)
- ub_sock_makewres_other(sk, skb_charge_size(size));
- if (sock_bc(sk) != NULL &&
- sock_bc(sk)->poll_reserv >=
- SOCK_MIN_UBCSPACE &&
- skb_charge_size(size) >
- sock_bc(sk)->poll_reserv)
- size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
-
-
/* Keep two messages in the pipe so it schedules better */
if (size > ((sk->sk_sndbuf >> 1) - 64))
size = (sk->sk_sndbuf >> 1) - 64;
@@ -2183,7 +2159,6 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
{
struct sock *sk = sock->sk;
unsigned int mask;
- int no_ub_res;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
@@ -2196,10 +2171,6 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
- no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
- if (no_ub_res)
- ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
-
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
@@ -2213,7 +2184,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
- if (!no_ub_res && unix_writable(sk))
+ if (unix_writable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
return mask;
More information about the Devel
mailing list