From: shemminger Date: Wed, 22 Jun 2005 18:27:49 +0000 (+0000) Subject: Netem support for reorder X-Git-Url: https://rtime.felk.cvut.cz/gitweb/lisovros/iproute2_canprio.git/commitdiff_plain/ea8fc1047d2b88ae1d123e3f79dbffbc2a3fa70c Netem support for reorder Update include files and add support for TCP_CONG --- diff --git a/ChangeLog b/ChangeLog index eb827df..5b31d2c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2005-06-22 Stephen Hemminger + + * Update include files to 2.6.12 + * Add ss support for TCP_CONG + 2005-06-13 Steven Whitehouse * Decnet doc's update diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 73d84c0..1d9da36 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -427,6 +427,7 @@ enum TCA_NETEM_UNSPEC, TCA_NETEM_CORR, TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, __TCA_NETEM_MAX, }; @@ -437,7 +438,7 @@ struct tc_netem_qopt __u32 latency; /* added delay (us) */ __u32 limit; /* fifo limit (packets) */ __u32 loss; /* random packet loss (0=none ~0=100%) */ - __u32 gap; /* re-ordering gap (0 for delay all) */ + __u32 gap; /* re-ordering gap (0 for none) */ __u32 duplicate; /* random packet dup (0=none ~0=100%) */ __u32 jitter; /* random jitter in latency (us) */ }; @@ -449,6 +450,12 @@ struct tc_netem_corr __u32 dup_corr; /* duplicate correlation */ }; +struct tc_netem_reorder +{ + __u32 probability; + __u32 correlation; +}; + #define NETEM_DIST_SCALE 8192 #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index fdc8b74..afc47fd 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -89,10 +89,21 @@ enum { RTM_GETANYCAST = 62, #define RTM_GETANYCAST RTM_GETANYCAST - RTM_MAX, -#define RTM_MAX RTM_MAX + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + /* Generic structure for encapsulation of optional route information. It is reminiscent of sockaddr, but with sa_family replaced @@ -398,19 +409,6 @@ enum #define IFA_MAX (__IFA_MAX - 1) -/* - * Quirk for IPv4 address deletion to allow exact deletion of equal - * addresses varying only in prefix length. A explicit exact comparison - * of the prefix length will only be done if IFA_PREFIX_EXACT_DEL is - * ORed to ifa_prefixlen. - * - * Note: This special treatment is only understood while deleting - * addresses and will lead to unexpected behaviour if used - * otherwise. - */ -#define IFA_PREFIX_EXACT_DEL 0x40 -#define IFA_REAL_DEL_PREFIX(l) ((l) & 0x3f) - /* ifa_flags */ #define IFA_F_SECONDARY 0x01 @@ -502,6 +500,106 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + +#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ + NLMSG_ALIGN(sizeof(struct ndtmsg)))) +#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) + + /**** * General form of address family dependent message. ****/ diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index ceee962..7a59967 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -99,9 +99,10 @@ enum TCPDIAG_MEMINFO, TCPDIAG_INFO, TCPDIAG_VEGASINFO, + TCPDIAG_CONG, }; -#define TCPDIAG_MAX TCPDIAG_VEGASINFO +#define TCPDIAG_MAX TCPDIAG_CONG /* TCPDIAG_MEM */ @@ -123,5 +124,4 @@ struct tcpvegas_info { __u32 tcpv_minrtt; }; - #endif /* _TCP_DIAG_H_ */ diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0df02a..d68391a 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -140,8 +140,11 @@ enum { XFRM_MSG_FLUSHPOLICY, #define XFRM_MSG_FLUSHPOLICY XFRM_MSG_FLUSHPOLICY - XFRM_MSG_MAX + __XFRM_MSG_MAX }; +#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) + +#define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE) struct xfrm_user_tmpl { struct xfrm_id id; @@ -171,6 +174,8 @@ enum xfrm_attr_type_t { XFRMA_ALG_COMP, /* struct xfrm_algo */ XFRMA_ENCAP, /* struct xfrm_algo + struct xfrm_encap_tmpl */ XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */ + XFRMA_SA, + XFRMA_POLICY, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -254,5 +259,7 @@ struct xfrm_usersa_flush { #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 +#define XFRMGRP_SA 4 +#define XFRMGRP_POLICY 8 #endif /* _LINUX_XFRM_H */ diff --git a/misc/ss.c b/misc/ss.c index 668a5bf..549bcc3 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1338,6 +1338,9 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r) if (info->tcpi_options & TCPI_OPT_ECN) printf(" ecn"); } + if (tb[TCPDIAG_CONG]) + printf("%s", (char *) RTA_DATA(tb[TCPDIAG_CONG])); + if (info->tcpi_options & TCPI_OPT_WSCALE) printf(" wscale:%d,%d", info->tcpi_snd_wscale, info->tcpi_rcv_wscale); @@ -1358,12 +1361,9 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r) const struct tcpvegas_info *vinfo = RTA_DATA(tb[TCPDIAG_VEGASINFO]); - if (vinfo->tcpv_enabled) - printf(" vegas"); - - if (vinfo->tcpv_rtt && - vinfo->tcpv_rtt != 0x7fffffff) - rtt = vinfo->tcpv_rtt; + if (vinfo->tcpv_enabled && + vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff) + rtt = vinfo->tcpv_rtt; } if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) { @@ -1442,7 +1442,6 @@ int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f) printf("\n"); return 0; - } int tcp_show_netlink(struct filter *f, FILE *dump_fp) @@ -1480,9 +1479,13 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp) if (show_tcpinfo) { req.r.tcpdiag_ext |= (1<<(TCPDIAG_INFO-1)); req.r.tcpdiag_ext |= (1<<(TCPDIAG_VEGASINFO-1)); + req.r.tcpdiag_ext |= (1<<(TCPDIAG_CONG-1)); } - iov[0] = (struct iovec){ &req, sizeof(req) }; + iov[0] = (struct iovec){ + .iov_base = &req, + .iov_len = sizeof(req) + }; if (f->f) { bclen = ssfilter_bytecompile(f->f, &bc); rta.rta_type = TCPDIAG_REQ_BYTECODE; @@ -1493,17 +1496,19 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp) } msg = (struct msghdr) { - (void*)&nladdr, sizeof(nladdr), - iov, f->f ? 3 : 1, - NULL, 0, - 0 + .msg_name = (void*)&nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = f->f ? 3 : 1, }; if (sendmsg(fd, &msg, 0) < 0) return -1; - - iov[0] = (struct iovec){ buf, sizeof(buf) }; + iov[0] = (struct iovec){ + .iov_base = buf, + .iov_len = sizeof(buf) + }; while (1) { int status; diff --git a/tc/q_netem.c b/tc/q_netem.c index f696cc3..df93add 100644 --- a/tc/q_netem.c +++ b/tc/q_netem.c @@ -29,11 +29,11 @@ static void explain(void) { fprintf(stderr, "Usage: ... netem [ limit PACKETS ] \n" \ -" [ delay TIME [ JITTER [CORRELATION]]]\n" \ +" [ delay TIME [ JITTER [CORRELATION]]]\n" \ +" [ distribution {uniform|normal|pareto|paretonormal} ]\n" \ " [ drop PERCENT [CORRELATION]] \n" \ " [ duplicate PERCENT [CORRELATION]]\n" \ -" [ distribution {uniform|normal|pareto|paretonormal} ]\n" \ -" [ gap PACKETS ]\n"); +" [ reorder PRECENT [CORRELATION] [ gap DISTANCE ]]\n"); } static void explain1(const char *arg) @@ -127,11 +127,13 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct rtattr *tail; struct tc_netem_qopt opt; struct tc_netem_corr cor; + struct tc_netem_reorder reorder; __s16 dist_data[MAXDIST]; memset(&opt, 0, sizeof(opt)); opt.limit = 1000; memset(&cor, 0, sizeof(cor)); + memset(&reorder, 0, sizeof(reorder)); while (argc > 0) { if (matches(*argv, "limit") == 0) { @@ -178,6 +180,19 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, return -1; } } + } else if (matches(*argv, "reorder") == 0) { + NEXT_ARG(); + if (get_percent(&reorder.probability, *argv)) { + explain1("reorder"); + return -1; + } + if (NEXT_IS_NUMBER()) { + NEXT_ARG(); + if (get_percent(&reorder.correlation, *argv)) { + explain1("reorder"); + return -1; + } + } } else if (matches(*argv, "gap") == 0) { NEXT_ARG(); if (get_u32(&opt.gap, *argv, 0)) { @@ -215,8 +230,27 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, tail = NLMSG_TAIL(n); + if (reorder.probability) { + if (opt.latency == 0) { + fprintf(stderr, "reordering not possible without specifying some delay\n"); + } + if (opt.gap == 0) + opt.gap = 1; + } else if (opt.gap > 0) { + fprintf(stderr, "gap specified without reorder probability\n"); + explain(); + return -1; + } + + if (dist_size > 0 && (opt.latency == 0 || opt.jitter == 0)) { + fprintf(stderr, "distribution specified but no latency and jitter values\n"); + explain(); + return -1; + } + addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); addattr_l(n, 1024, TCA_NETEM_CORR, &cor, sizeof(cor)); + addattr_l(n, 1024, TCA_NETEM_REORDER, &reorder, sizeof(reorder)); if (dist_size > 0) { addattr_l(n, 32768, TCA_NETEM_DELAY_DIST, @@ -229,6 +263,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { const struct tc_netem_corr *cor = NULL; + const struct tc_netem_reorder *reorder = NULL; struct tc_netem_qopt qopt; int len = RTA_PAYLOAD(opt) - sizeof(qopt); SPRINT_BUF(b1); @@ -252,6 +287,11 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) return -1; cor = RTA_DATA(tb[TCA_NETEM_CORR]); } + if (tb[TCA_NETEM_REORDER]) { + if (RTA_PAYLOAD(tb[TCA_NETEM_REORDER]) < sizeof(*reorder)) + return -1; + reorder = RTA_DATA(tb[TCA_NETEM_REORDER]); + } } fprintf(f, "limit %d", qopt.limit); @@ -278,6 +318,14 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (cor && cor->dup_corr) fprintf(f, " %s", sprint_percent(cor->dup_corr, b1)); } + + if (reorder && reorder->probability) { + fprintf(f, " reorder %s", + sprint_percent(reorder->probability, b1)); + if (reorder->correlation) + fprintf(f, " %s", + sprint_percent(reorder->correlation, b1)); + } if (qopt.gap) fprintf(f, " gap %lu", (unsigned long)qopt.gap);