summaryrefslogtreecommitdiff
path: root/2003/netfilter-curdevel-ukuug2003/netfilter-curdevel-ukuug2003.tex
blob: 2659ae1cdb4259beda1dac1f2ed2df59e5b9879e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
\documentstyle{seminar}
\begin{document}
\vspace{3mm}
\begin{slide}
\vspace{3mm}
\begin{center}
\vspace{3mm}
\vspace{3mm}
The future of Linux packet filtering\\
\vspace{3mm}
\vspace{3mm}
\vspace{3mm}
\end{center}
\begin{center}
by\\
\vspace{3mm}
Harald Welte <laforge@netfilter.org>\\
\vspace{3mm}
\vspace{3mm}
\end{center}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
Contents\\
\vspace{3mm}
\vspace{3mm}
	Problems with current 2.4/2.5 netfilter/iptables\\
		Solution to code replication\\
		Solution for dynamic rulesets\\
		Solution for API to GUI's and other management programs\\
\vspace{3mm}
	Other current work\\
		Optimizing Rule load time of large rulesets\\
		Making netfilter/iptables compatible with zerocopy tcp\\
\vspace{3mm}
	HA for stateful firewalling\\
		What's special about firewalling HA\\
		Poor man's failover\\
		Real state replication\\
\vspace{3mm}
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
Problems with 2.4.x netfilter/iptables\\
\vspace{3mm}
	code replication between iptables/ip6tables/arptables\\
		iptables was never meant for other protocols, but people did copy+paste 'ports'\\
		replication of\\
			core kernel code\\
			layer 3 independent matches (mac, interface, ...)\\
			userspace library (libiptc)\\
			userspace tool (iptables)\\
			userspace plugins (libipt_xxx.so)\\
\vspace{3mm}
	doesn't suit the needs for dynamically changing rulesets\\
		dynamic rulesets becomming more common due (service selection, IDS)\\
		a whole table is created in userspace and sent as blob to kernel\\
		for every ruleset the table needs to be copied to userspace and back\\
		inside kernel consistency checks on whole table, loop detection\\
\vspace{3mm}
	too extensible for writing any forward-compatible GUI\\
		new extensions showing up all the time\\
		a frontend would need to know about the options and use of a new extension\\
		thus frontends are always incomplete and out-of-date\\
		no high-level API other than piping to iptables-restore\\
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
Reducing code replication\\
\vspace{3mm}
	code replication is a real problem: unclean, bugfixes missed\\
	we need layer 3 independent layer for\\
		submitting rules to the kernel\\
		traversing packet-rulesets supporting match/target modules\\
		registering matches/targets\\
			layer 3 specific (like matching ipv4 address)\\
			layer 3 independent (like matching MAC address)\\
\vspace{3mm}
	solution\\
		pkt_tables inside kernel\\
			pkt_tables_ipv4 registers layer 3 handler with pkt_tables\\
			pkt_tables_ipv6 registers layer 3 handler with pkt_tables\\
			everybody registering a pkt_table (like iptable_filter) needs to specify the l3 protocol\\
		libraries in userspace (see later)\\
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
Supporting dynamic rulesets\\
\vspace{3mm}
	atomic table-replacement turned out to be bad idea\\
	need new interface for sending individual rules to kernel\\
	policy routing has the same problem and good solution: rtnetlink\\
	solution: nfnetlink\\
		multicast-netlink based packet-orinented socket between kernel and userspace\\
		has extra benefit that other userspace processes get notified of rule changes [just like routing daemons]\\
		nfnetlink will be low-layer below all kernel/userspace communication\\
			pkttnetlink [aka iptnetlink]\\
			ctnetlink\\
			ulog\\
			ip_queue\\
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
Communication with other programs\\
\vspace{3mm}
whole set of libraries\\
	libnfnetlink for low-layer communication\\
	libpkttnetlink for rule modifications\\
		will handle all plugins [which are currently part of iptables]\\
		query functions about avaliable matches/targets\\
		query functions about parameters\\
		query functions for help messages about specific match/parameter of a match\\
		generic structure from which rules can be built\\
		conversion functions to parse generic structure into in-kernel structure\\
		conversion functions to perse kernel structure into generic structure\\
		functions to convert generic structure in plain text\\
	libipq will stay API-compatible to current version\\
	libipulog will stay API-compatible to current version\\
	libiptc will go away [compatibility layer extremely difficult]\\
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
Optimizing rule load time\\
\vspace{3mm}
	Current situation\\
		loading 10,000 rules in 1,000 chains takes about 4 minutes on a PIII 733Mhz\\
		this is caused by two bottlenecks\\
			loop detection algorithm on kernel side inefficient\\
			a couple of O^2 complexity functions in libiptc\\
\vspace{3mm}
	Solution\\
		efficient loop detection and mark_source_chains() algorithm (graph coloring)\\
		current CVS libiptc with only one O^2 function: 2minutes37\\
		whole reimplementation of libiptc needed for removing the last O^2 function \\
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
HA for netfillter/iptables\\
Optimizing the connection tracking code\\
\vspace{3mm}
	Conntrack hash function optimization\\
		old hash function not good for even hash bucket count\\
		hash function evaluation tool [cttest] avaliable\\
		other hash functions in development (already in 2.4.21)\\
		introduce per-system randomness to prevent hash attack\\
		code optimization (locking/timers/...)\\
\vspace{3mm}
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
netfilter and zerocopy TCP\\
\vspace{3mm}
	Current situation (2.4.x)\\
		skb_linearize() at each netfilter hook effectively prevents zerocopy TCP to work if netfilter/iptables is enabled\\
		this is a big performance loss on stand-alone servers which filter packets locally\\
\vspace{3mm}
	Solution\\
		remove skb_linearize() from conntrack, nat and ip_tables core\\
		all iptables extensions and conntrack/nat helpers have to use skb_copy_bits() if they want to access data beyond layer 4 header\\
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
HA for netfillter/iptables\\
Introduction\\
\vspace{3mm}
What is special about firewall failover?\\
\vspace{3mm}
	Nothing, in case of the stateless packet filter\\
		Common IP takeover solutions can be used\\
			VRRP\\
			Hartbeat\\
\vspace{3mm}
	Distribution of packet filtering ruleset no problem\\
		can be done manually\\
		or implemented with simple userspace process\\
\vspace{3mm}
	Problems arise with stateful packet filters\\
		Connection state only on active node\\
		NAT mappings only on active node\\
\vspace{3mm}
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
HA for netfillter/iptables\\
Poor man's failover\\
\vspace{3mm}
Poor man's failover\\
	principle\\
		let every node do it's own tracking rather than replicating state\\
	two possible implementations\\
		connect every node to shared media (i.e. real ethernet)\\
			forwarding only turned on on active node\\
			slave nodes use promiscuous mode to sniff packets\\
		copy all traffic to slave nodes\\
			active master needs to copy all traffic to other nodes\\
			disadvantage: high load, sync traffic == payload traffic\\
			IMHO stupid way of solving the problem \\
	advantages\\
		very easy implementation\\
			only addition of sniffing mode to conntrack needed\\
			existing means of address takeover can be used\\
		same load on active master and slave nodes\\
		no additional load on active master\\
	disadvantages\\
		can only be used with real shared media (no switches, ...)\\
		can not be used with NAT\\
	remaining problem\\
		no initial state sync after reboot of slave node!\\
\vspace{3mm}
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
HA for netfillter/iptables\\
Real state replication\\
\vspace{3mm}
Parts needed\\
	state replication protocol\\
		multicast based\\
		sequence numbers for detection of packet loss\\
		NACK-based retransmission\\
		no security, since private ethernet segment to be used\\
	event interface on active node\\
		calling out to callback function at all state changes\\
	exported interface to manipulate conntrack hash table\\
	kernel thread for sending conntrack state protocol messages\\
		registers with event interface\\
		creates and accumulates state replication packets\\
		sends them via in-kernel sockets api\\
	kernel thread for receiving conntrack state replication messages\\
		receives state replication packets via in-kernel sockets\\
		uses conntrack hashtable manipulation interface\\
\vspace{3mm}
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
HA for netfillter/iptables\\
Real state replication\\
\vspace{3mm}
	Flow of events in chronological order:\\
		on active node, inside the network RX softirq\\
			connection tracking code is analyzing a forwarded packet\\
			connection tracking gathers some new state information\\
			connection tracking updates local connection tracking database\\
			connection tracking sends event message to event API\\
		on active node, inside the conntrack-sync kernel thread\\
			conntrack sync daemon receives event through event API\\
			conntrack sync daemon aggregates multiple event messages into a state replication protocol message, removing possible redundancy\\
			conntrack sync daemon generates state replication protocol message\\
			conntrack sync daemon sends state replication protocol message\\
		on slave node(s), inside network RX softirq\\
			connection tracking code ignores packets coming from the interface attached to the private conntrac sync network\\
			state replication protocol messages is appended to socket receive queue of conntrack-sync kernel thread\\
		on slave node(s), inside conntrack-sync kernel thread\\
			conntrack sync daemon receives state replication message\\
			conntrack sync daemon creates/updates conntrack entry\\
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
HA for netfillter/iptables\\
Neccessary changes to kernel\\
\vspace{3mm}
Neccessary changes to current conntrack core\\
\vspace{3mm}
	event generation (callback functions) for all state changes\\
\vspace{3mm}
	conntrack hashtable manipulation API\\
		is needed (and already implemented) for 'ctnetlink' API\\
\vspace{3mm}
	conntrack exemptions\\
		needed to _not_ track conntrack state replication packets\\
		is needed for other cases as well\\
		currently being developed by Jozsef Kadlecsik\\
\vspace{3mm}
\vspace{3mm}
\vspace{3mm}
\end{slide}
\begin{slide}
Future of Linux packet filtering\\
Thanks\\
		The slides of this presentation are available at http://www.gnumonks.org/\\
\vspace{3mm}
		Visit the netfilter homepage http://www.netfilter.org/\\
\vspace{3mm}
	Thanks to\\
		the BBS people, Z-Netz, FIDO, ...\\
			for heavily increasing my computer usage in 1992\\
		KNF\\
			for bringing me in touch with the internet as early as 1994\\
			for providing a playground for technical people\\
			for telling me about the existance of Linux!\\
		Alan Cox, Alexey Kuznetsov, David Miller, Andi Kleen\\
			for implementing (one of?) the world's best TCP/IP stacks\\
		Paul 'Rusty' Russell\\
			for starting the netfilter/iptables project\\
			for trusting me to maintain it today\\
		Astaro AG\\
			for sponsoring most of my current netfilter work\\
\vspace{3mm}
\end{slide}
\end{document}
personal git repositories of Harald Welte. Your mileage may vary