From 3a7e054d796f14365ff5fc167ae7cff0a02b9c0f Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Fri, 3 Apr 2026 17:33:23 +0300 Subject: [PATCH 1/9] Strengthen IPv4 regression fence before IPv6 implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 6 new test cases covering previously weak IPv4 areas: - numeric forms (integer, octal, two-part, three-part via inet_aton) - boundary addresses (0.0.0.0, 255.255.255.255) - adjacency merge at IPv4 boundaries - default-mode IPv4 contract (no family flag) - numeric forms in ranges and CIDRs - max-edge wrap safety (overflow-safe operations near 255.255.255.255) Total test count: 71 → 77 --- tests.d/72-numeric-ipv4-forms/cmd.sh | 47 ++++++++++++++++++++ tests.d/72-numeric-ipv4-forms/output | 14 ++++++ tests.d/73-boundary-addresses/cmd.sh | 32 +++++++++++++ tests.d/73-boundary-addresses/output | 22 +++++++++ tests.d/74-boundary-adjacency-merge/cmd.sh | 39 ++++++++++++++++ tests.d/74-boundary-adjacency-merge/output | 23 ++++++++++ tests.d/75-default-mode-ipv4-contract/cmd.sh | 30 +++++++++++++ tests.d/75-default-mode-ipv4-contract/output | 18 ++++++++ tests.d/76-numeric-ranges/cmd.sh | 26 +++++++++++ tests.d/76-numeric-ranges/output | 16 +++++++ tests.d/77-max-edge-wrap-safety/cmd.sh | 31 +++++++++++++ tests.d/77-max-edge-wrap-safety/output | 17 +++++++ 12 files changed, 315 insertions(+) create mode 100755 tests.d/72-numeric-ipv4-forms/cmd.sh create mode 100644 tests.d/72-numeric-ipv4-forms/output create mode 100755 tests.d/73-boundary-addresses/cmd.sh create mode 100644 tests.d/73-boundary-addresses/output create mode 100755 tests.d/74-boundary-adjacency-merge/cmd.sh create mode 100644 tests.d/74-boundary-adjacency-merge/output create mode 100755 tests.d/75-default-mode-ipv4-contract/cmd.sh create mode 100644 tests.d/75-default-mode-ipv4-contract/output create mode 100755 tests.d/76-numeric-ranges/cmd.sh create mode 100644 tests.d/76-numeric-ranges/output create mode 100755 tests.d/77-max-edge-wrap-safety/cmd.sh create mode 100644 tests.d/77-max-edge-wrap-safety/output diff --git a/tests.d/72-numeric-ipv4-forms/cmd.sh b/tests.d/72-numeric-ipv4-forms/cmd.sh new file mode 100755 index 0000000..eea320b --- /dev/null +++ b/tests.d/72-numeric-ipv4-forms/cmd.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Test inet_aton() numeric IPv4 forms that go through the IP parsing path: +# raw 32-bit integer, octal, two-part, three-part notation. +# These are accepted by the parser (digits + dots + slash characters) +# and passed to inet_aton() which handles all these forms. + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +# --- Individual numeric forms --- +# raw integer: 167772161 = 10.0.0.1 +# octal: 012.0.0.1 = 10.0.0.1 +# two-part: 10.1 = 10.0.0.1 +# three-part: 10.0.1 = 10.0.0.1 +cat >"$tmpdir/input" <<'EOF' +167772161 +012.0.0.1 +10.1 +10.0.1 +EOF + +echo "# Numeric forms merged (all resolve to 10.0.0.1):" +../../iprange "$tmpdir/input" + +# --- Integer range --- +echo "# Integer range 167772160-167772163 = 10.0.0.0/30:" +echo "167772160 - 167772163" | ../../iprange + +# --- Octal CIDR --- +echo "# Octal CIDR 012.0.0.0/24 = 10.0.0.0/24:" +echo "012.0.0.0/24" | ../../iprange + +# --- Two-part CIDR --- +echo "# Two-part 10.0/16 = 10.0.0.0/16:" +echo "10.0/16" | ../../iprange + +# --- Count using integer notation --- +echo "# Count of integer 0/0 (entire IPv4 space):" +echo "0/0" | ../../iprange -C + +# --- Verify integer zero --- +echo "# Integer 0 = 0.0.0.0:" +echo "0" | ../../iprange + +# --- Verify max integer --- +echo "# Integer 4294967295 = 255.255.255.255:" +echo "4294967295" | ../../iprange diff --git a/tests.d/72-numeric-ipv4-forms/output b/tests.d/72-numeric-ipv4-forms/output new file mode 100644 index 0000000..56ee4c9 --- /dev/null +++ b/tests.d/72-numeric-ipv4-forms/output @@ -0,0 +1,14 @@ +# Numeric forms merged (all resolve to 10.0.0.1): +10.0.0.1 +# Integer range 167772160-167772163 = 10.0.0.0/30: +10.0.0.0/30 +# Octal CIDR 012.0.0.0/24 = 10.0.0.0/24: +10.0.0.0/24 +# Two-part 10.0/16 = 10.0.0.0/16: +10.0.0.0/16 +# Count of integer 0/0 (entire IPv4 space): +1,4294967296 +# Integer 0 = 0.0.0.0: +0.0.0.0 +# Integer 4294967295 = 255.255.255.255: +255.255.255.255 diff --git a/tests.d/73-boundary-addresses/cmd.sh b/tests.d/73-boundary-addresses/cmd.sh new file mode 100755 index 0000000..209681a --- /dev/null +++ b/tests.d/73-boundary-addresses/cmd.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Test boundary address behavior: 0.0.0.0, 255.255.255.255, and edge ranges + +echo "# Single 0.0.0.0:" +echo "0.0.0.0" | ../../iprange + +echo "# Single 255.255.255.255:" +echo "255.255.255.255" | ../../iprange + +echo "# Range 0.0.0.0 - 0.0.0.0:" +echo "0.0.0.0 - 0.0.0.0" | ../../iprange + +echo "# Range 255.255.255.255 - 255.255.255.255:" +echo "255.255.255.255 - 255.255.255.255" | ../../iprange + +echo "# Full range 0.0.0.0 - 255.255.255.255:" +echo "0.0.0.0 - 255.255.255.255" | ../../iprange + +echo "# Count 0.0.0.0:" +echo "0.0.0.0" | ../../iprange -C + +echo "# Count 255.255.255.255:" +echo "255.255.255.255" | ../../iprange -C + +echo "# Count 0.0.0.0/0:" +echo "0.0.0.0/0" | ../../iprange -C + +echo "# Print ranges for boundary:" +printf "0.0.0.0\n255.255.255.255\n" | ../../iprange -j + +echo "# Print single IPs for boundaries:" +printf "0.0.0.0\n255.255.255.255\n" | ../../iprange -1 diff --git a/tests.d/73-boundary-addresses/output b/tests.d/73-boundary-addresses/output new file mode 100644 index 0000000..553f32c --- /dev/null +++ b/tests.d/73-boundary-addresses/output @@ -0,0 +1,22 @@ +# Single 0.0.0.0: +0.0.0.0 +# Single 255.255.255.255: +255.255.255.255 +# Range 0.0.0.0 - 0.0.0.0: +0.0.0.0 +# Range 255.255.255.255 - 255.255.255.255: +255.255.255.255 +# Full range 0.0.0.0 - 255.255.255.255: +0.0.0.0/0 +# Count 0.0.0.0: +1,1 +# Count 255.255.255.255: +1,1 +# Count 0.0.0.0/0: +1,4294967296 +# Print ranges for boundary: +0.0.0.0-0.0.0.0 +255.255.255.255-255.255.255.255 +# Print single IPs for boundaries: +0.0.0.0 +255.255.255.255 diff --git a/tests.d/74-boundary-adjacency-merge/cmd.sh b/tests.d/74-boundary-adjacency-merge/cmd.sh new file mode 100755 index 0000000..10b6f21 --- /dev/null +++ b/tests.d/74-boundary-adjacency-merge/cmd.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# Test adjacency and merge behavior at IPv4 boundaries (0 and max) + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +echo "# Adjacent at bottom (0.0.0.0 + 0.0.0.1 merge to /31):" +printf "0.0.0.0\n0.0.0.1\n" | ../../iprange + +echo "# Adjacent at top (255.255.255.254 + 255.255.255.255 merge to /31):" +printf "255.255.255.254\n255.255.255.255\n" | ../../iprange + +echo "# Four at bottom merge to /30:" +printf "0.0.0.0\n0.0.0.1\n0.0.0.2\n0.0.0.3\n" | ../../iprange + +echo "# Four at top merge to /30:" +printf "255.255.255.252\n255.255.255.253\n255.255.255.254\n255.255.255.255\n" | ../../iprange + +# Exclude top from full range +echo "0.0.0.0/0" >"$tmpdir/full" +echo "255.255.255.255" >"$tmpdir/top" +echo "0.0.0.0" >"$tmpdir/bottom" +printf "0.0.0.0\n255.255.255.255\n" >"$tmpdir/both" +echo "255.255.255.255" >"$tmpdir/toponly" + +echo "# Exclude top from full range:" +../../iprange "$tmpdir/full" --except "$tmpdir/top" | tail -3 + +echo "# Exclude bottom from full range:" +../../iprange "$tmpdir/full" --except "$tmpdir/bottom" | head -3 + +echo "# Common of {0,max} and {max}:" +../../iprange "$tmpdir/both" --common "$tmpdir/toponly" + +echo "# Diff of {0} vs {max}:" +../../iprange "$tmpdir/bottom" --diff "$tmpdir/top" + +echo "# Count adjacent merge at top:" +printf "255.255.255.254\n255.255.255.255\n" | ../../iprange -C diff --git a/tests.d/74-boundary-adjacency-merge/output b/tests.d/74-boundary-adjacency-merge/output new file mode 100644 index 0000000..ed2edb3 --- /dev/null +++ b/tests.d/74-boundary-adjacency-merge/output @@ -0,0 +1,23 @@ +# Adjacent at bottom (0.0.0.0 + 0.0.0.1 merge to /31): +0.0.0.0/31 +# Adjacent at top (255.255.255.254 + 255.255.255.255 merge to /31): +255.255.255.254/31 +# Four at bottom merge to /30: +0.0.0.0/30 +# Four at top merge to /30: +255.255.255.252/30 +# Exclude top from full range: +255.255.255.248/30 +255.255.255.252/31 +255.255.255.254 +# Exclude bottom from full range: +0.0.0.1 +0.0.0.2/31 +0.0.0.4/30 +# Common of {0,max} and {max}: +255.255.255.255 +# Diff of {0} vs {max}: +0.0.0.0 +255.255.255.255 +# Count adjacent merge at top: +1,2 diff --git a/tests.d/75-default-mode-ipv4-contract/cmd.sh b/tests.d/75-default-mode-ipv4-contract/cmd.sh new file mode 100755 index 0000000..9a98567 --- /dev/null +++ b/tests.d/75-default-mode-ipv4-contract/cmd.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Test that default mode (no family flag) behaves as IPv4. +# This is the future-proofing contract: once -4/-6 exist, +# the default must still be IPv4 for backward compatibility. + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +echo "# Default prefix is /32:" +echo "10.0.0.1" | ../../iprange -C + +echo "# Default merge of standard dotted-quad:" +printf "192.168.1.0/24\n192.168.2.0/24\n" | ../../iprange + +echo "# Default count-unique-all:" +printf "10.0.0.0/24\n" >"$tmpdir/a" +printf "10.0.1.0/24\n" >"$tmpdir/b" +../../iprange --count-unique-all --header "$tmpdir/a" as setA "$tmpdir/b" as setB + +echo "# Default compare-next:" +../../iprange --header "$tmpdir/a" as setA --compare-next "$tmpdir/b" as setB + +echo "# Default range parsing:" +echo "10.0.0.1 - 10.0.0.10" | ../../iprange -j + +echo "# Default CIDR with netmask notation:" +echo "10.0.0.0/255.255.255.0" | ../../iprange -C + +echo "# Default --dont-fix-network:" +echo "10.0.0.5/24" | ../../iprange --dont-fix-network -j diff --git a/tests.d/75-default-mode-ipv4-contract/output b/tests.d/75-default-mode-ipv4-contract/output new file mode 100644 index 0000000..7f7db1d --- /dev/null +++ b/tests.d/75-default-mode-ipv4-contract/output @@ -0,0 +1,18 @@ +# Default prefix is /32: +1,1 +# Default merge of standard dotted-quad: +192.168.1.0/24 +192.168.2.0/24 +# Default count-unique-all: +name,entries,unique_ips +setA,1,256 +setB,1,256 +# Default compare-next: +name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips +setA,setB,1,1,256,256,512,0 +# Default range parsing: +10.0.0.1-10.0.0.10 +# Default CIDR with netmask notation: +1,256 +# Default --dont-fix-network: +10.0.0.5-10.0.0.255 diff --git a/tests.d/76-numeric-ranges/cmd.sh b/tests.d/76-numeric-ranges/cmd.sh new file mode 100755 index 0000000..331ab29 --- /dev/null +++ b/tests.d/76-numeric-ranges/cmd.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Test numeric forms in ranges and CIDR combinations + +echo "# Integer range:" +echo "167772160 - 167772163" | ../../iprange + +echo "# Octal range:" +echo "012.0.0.0 - 012.0.0.3" | ../../iprange + +echo "# Mixed numeric: octal start, dotted end:" +echo "012.0.0.0 - 10.0.0.3" | ../../iprange + +echo "# Two-part range:" +echo "10.0 - 10.3" | ../../iprange + +echo "# Integer CIDR:" +echo "167772160/30" | ../../iprange + +echo "# Octal CIDR count:" +echo "012.0.0.0/24" | ../../iprange -C + +echo "# Integer zero with prefix 0:" +echo "0/0" | ../../iprange -C + +echo "# Large integer as single IP:" +echo "3232235777" | ../../iprange diff --git a/tests.d/76-numeric-ranges/output b/tests.d/76-numeric-ranges/output new file mode 100644 index 0000000..20f8e47 --- /dev/null +++ b/tests.d/76-numeric-ranges/output @@ -0,0 +1,16 @@ +# Integer range: +10.0.0.0/30 +# Octal range: +10.0.0.0/30 +# Mixed numeric: octal start, dotted end: +10.0.0.0/30 +# Two-part range: +10.0.0.0/30 +# Integer CIDR: +10.0.0.0/30 +# Octal CIDR count: +1,256 +# Integer zero with prefix 0: +1,4294967296 +# Large integer as single IP: +192.168.1.1 diff --git a/tests.d/77-max-edge-wrap-safety/cmd.sh b/tests.d/77-max-edge-wrap-safety/cmd.sh new file mode 100755 index 0000000..c4e5ca2 --- /dev/null +++ b/tests.d/77-max-edge-wrap-safety/cmd.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Test that operations near the max IPv4 address (255.255.255.255) +# handle wrap-around safely without overflow or underflow. + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +echo "# Optimize preserves max-adjacent pair:" +printf "255.255.255.254\n255.255.255.255\n" | ../../iprange + +echo "# Binary round-trip of max address:" +echo "255.255.255.255" | ../../iprange --print-binary | ../../iprange + +echo "# Binary round-trip of max /31:" +printf "255.255.255.254\n255.255.255.255\n" | ../../iprange --print-binary | ../../iprange + +echo "# Exclude max from max /24:" +echo "255.255.255.0/24" >"$tmpdir/net" +echo "255.255.255.255" >"$tmpdir/top" +../../iprange "$tmpdir/net" --except "$tmpdir/top" | tail -1 + +echo "# Exclude everything except max:" +echo "0.0.0.0/0" >"$tmpdir/all" +echo "0.0.0.0 - 255.255.255.254" >"$tmpdir/below" +../../iprange "$tmpdir/all" --except "$tmpdir/below" + +echo "# Count of /31 at max:" +printf "255.255.255.254/31\n" | ../../iprange -C + +echo "# Single IP output of max /30:" +printf "255.255.255.252/30\n" | ../../iprange -1 diff --git a/tests.d/77-max-edge-wrap-safety/output b/tests.d/77-max-edge-wrap-safety/output new file mode 100644 index 0000000..ec03014 --- /dev/null +++ b/tests.d/77-max-edge-wrap-safety/output @@ -0,0 +1,17 @@ +# Optimize preserves max-adjacent pair: +255.255.255.254/31 +# Binary round-trip of max address: +255.255.255.255 +# Binary round-trip of max /31: +255.255.255.254/31 +# Exclude max from max /24: +255.255.255.254 +# Exclude everything except max: +255.255.255.255 +# Count of /31 at max: +1,2 +# Single IP output of max /30: +255.255.255.252 +255.255.255.253 +255.255.255.254 +255.255.255.255 From 3c8c59dfe1aed460ef7d4cc374cb2bfd04f44d2b Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Fri, 3 Apr 2026 18:34:41 +0300 Subject: [PATCH 2/9] Add IPv6 support with --ipv4/-4 and --ipv6/-6 family flags IPv6 support using __uint128_t for 128-bit address representation. One active family per invocation; normalization at input boundary. Family contract: - Default (no flag): IPv4 mode for backward compatibility - --ipv4 / -4: explicit IPv4 mode - --ipv6 / -6: IPv6 mode, accepts both IPv6 and IPv4 input (IPv4 normalized to ::ffff:x.x.x.x mapped addresses) Implementation: - iprange6.h: IPv6 types, helpers (netmask6, broadcast6, etc.) - ipset6.*: IPv6 container with all set operations (optimize, merge, common, exclude, diff, combine, copy) - ipset6_load.c: IPv6 parser with mixed-family detection, DNS resolution for both AAAA and A records - ipset6_print.c: IPv6 CIDR decomposition (0..128 prefixes), range/single-IP output, 128-bit decimal formatting - ipset6_binary.c: Binary format v2 with family header, keeps v1 reading for IPv4 - iprange6_main.c: Complete IPv6 mode execution path - iprange.c: CLI flags, family routing, --has-ipv6 detection All modes supported in IPv6: merge, common, exclude, diff, compare, compare-first, compare-next, count-unique, count-unique-all, print-binary, print-ranges, print-single-ips. Tests: 88 total (77 IPv4 + 11 IPv6), all passing. --- Makefile.am | 29 + README.md | 46 +- src/iprange.c | 84 ++- src/iprange6.h | 186 ++++++ src/iprange6_main.c | 500 +++++++++++++++ src/ipset6.c | 94 +++ src/ipset6.h | 103 +++ src/ipset6_binary.c | 289 +++++++++ src/ipset6_binary.h | 11 + src/ipset6_combine.c | 36 ++ src/ipset6_common.c | 80 +++ src/ipset6_copy.c | 26 + src/ipset6_diff.c | 132 ++++ src/ipset6_exclude.c | 111 ++++ src/ipset6_load.c | 603 ++++++++++++++++++ src/ipset6_load.h | 8 + src/ipset6_merge.c | 33 + src/ipset6_optimize.c | 74 +++ src/ipset6_print.c | 206 ++++++ src/ipset6_print.h | 17 + tests.d/78-ipv6-basic-merge/cmd.sh | 4 + tests.d/78-ipv6-basic-merge/output | 3 + tests.d/79-ipv6-cidr-decomposition/cmd.sh | 20 + tests.d/79-ipv6-cidr-decomposition/output | 15 + tests.d/80-ipv6-set-operations/cmd.sh | 22 + tests.d/80-ipv6-set-operations/output | 10 + .../81-ipv6-ipv4-mapped-normalization/cmd.sh | 17 + .../81-ipv6-ipv4-mapped-normalization/output | 11 + tests.d/82-ipv6-binary-roundtrip/cmd.sh | 14 + tests.d/82-ipv6-binary-roundtrip/output | 10 + tests.d/83-ipv6-count-compare/cmd.sh | 23 + tests.d/83-ipv6-count-compare/output | 13 + tests.d/84-ipv6-single-ips-cap/cmd.sh | 11 + tests.d/84-ipv6-single-ips-cap/output | 9 + tests.d/85-ipv6-default-mode-is-ipv4/cmd.sh | 14 + tests.d/85-ipv6-default-mode-is-ipv4/output | 8 + tests.d/86-ipv6-mixed-family-rejection/cmd.sh | 18 + tests.d/86-ipv6-mixed-family-rejection/output | 2 + tests.d/87-ipv6-boundary-addresses/cmd.sh | 23 + tests.d/87-ipv6-boundary-addresses/output | 14 + tests.d/88-ipv6-has-ipv6-flag/cmd.sh | 12 + tests.d/88-ipv6-has-ipv6-flag/output | 2 + 42 files changed, 2938 insertions(+), 5 deletions(-) create mode 100644 src/iprange6.h create mode 100644 src/iprange6_main.c create mode 100644 src/ipset6.c create mode 100644 src/ipset6.h create mode 100644 src/ipset6_binary.c create mode 100644 src/ipset6_binary.h create mode 100644 src/ipset6_combine.c create mode 100644 src/ipset6_common.c create mode 100644 src/ipset6_copy.c create mode 100644 src/ipset6_diff.c create mode 100644 src/ipset6_exclude.c create mode 100644 src/ipset6_load.c create mode 100644 src/ipset6_load.h create mode 100644 src/ipset6_merge.c create mode 100644 src/ipset6_optimize.c create mode 100644 src/ipset6_print.c create mode 100644 src/ipset6_print.h create mode 100755 tests.d/78-ipv6-basic-merge/cmd.sh create mode 100644 tests.d/78-ipv6-basic-merge/output create mode 100755 tests.d/79-ipv6-cidr-decomposition/cmd.sh create mode 100644 tests.d/79-ipv6-cidr-decomposition/output create mode 100755 tests.d/80-ipv6-set-operations/cmd.sh create mode 100644 tests.d/80-ipv6-set-operations/output create mode 100755 tests.d/81-ipv6-ipv4-mapped-normalization/cmd.sh create mode 100644 tests.d/81-ipv6-ipv4-mapped-normalization/output create mode 100755 tests.d/82-ipv6-binary-roundtrip/cmd.sh create mode 100644 tests.d/82-ipv6-binary-roundtrip/output create mode 100755 tests.d/83-ipv6-count-compare/cmd.sh create mode 100644 tests.d/83-ipv6-count-compare/output create mode 100755 tests.d/84-ipv6-single-ips-cap/cmd.sh create mode 100644 tests.d/84-ipv6-single-ips-cap/output create mode 100755 tests.d/85-ipv6-default-mode-is-ipv4/cmd.sh create mode 100644 tests.d/85-ipv6-default-mode-is-ipv4/output create mode 100755 tests.d/86-ipv6-mixed-family-rejection/cmd.sh create mode 100644 tests.d/86-ipv6-mixed-family-rejection/output create mode 100755 tests.d/87-ipv6-boundary-addresses/cmd.sh create mode 100644 tests.d/87-ipv6-boundary-addresses/output create mode 100755 tests.d/88-ipv6-has-ipv6-flag/cmd.sh create mode 100644 tests.d/88-ipv6-has-ipv6-flag/output diff --git a/Makefile.am b/Makefile.am index 09f12d7..aa292bb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -33,8 +33,25 @@ endif iprange_SOURCES = \ src/iprange.c \ src/iprange.h \ + src/iprange6.h \ + src/iprange6_main.c \ src/ipset.c \ src/ipset.h \ + src/ipset6.c \ + src/ipset6.h \ + src/ipset6_binary.c \ + src/ipset6_binary.h \ + src/ipset6_combine.c \ + src/ipset6_common.c \ + src/ipset6_copy.c \ + src/ipset6_diff.c \ + src/ipset6_exclude.c \ + src/ipset6_load.c \ + src/ipset6_load.h \ + src/ipset6_merge.c \ + src/ipset6_optimize.c \ + src/ipset6_print.c \ + src/ipset6_print.h \ src/ipset_binary.c \ src/ipset_binary.h \ src/ipset_combine.c \ @@ -61,7 +78,19 @@ iprange_SOURCES = \ VPATH_LOCAL_OBJECTS = \ src/iprange.$(OBJEXT) \ + src/iprange6_main.$(OBJEXT) \ src/ipset.$(OBJEXT) \ + src/ipset6.$(OBJEXT) \ + src/ipset6_binary.$(OBJEXT) \ + src/ipset6_combine.$(OBJEXT) \ + src/ipset6_common.$(OBJEXT) \ + src/ipset6_copy.$(OBJEXT) \ + src/ipset6_diff.$(OBJEXT) \ + src/ipset6_exclude.$(OBJEXT) \ + src/ipset6_load.$(OBJEXT) \ + src/ipset6_merge.$(OBJEXT) \ + src/ipset6_optimize.$(OBJEXT) \ + src/ipset6_print.$(OBJEXT) \ src/ipset_binary.$(OBJEXT) \ src/ipset_combine.$(OBJEXT) \ src/ipset_common.$(OBJEXT) \ diff --git a/README.md b/README.md index e7f44e6..6addb61 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # iprange -`iprange` is a fast command-line tool for reading, normalizing, comparing, and exporting IPv4 address sets. +`iprange` is a fast command-line tool for reading, normalizing, comparing, and exporting IPv4 and IPv6 address sets. It understands single IPs, CIDRs, netmasks, numeric IPs, ranges, and hostnames. You can use it to merge blocklists, compute intersections or exclusions, generate data for `ipset restore`, or compare multiple IP sets as CSV. @@ -23,11 +23,26 @@ It understands single IPs, CIDRs, netmasks, numeric IPs, ranges, and hostnames. - numeric IPs - hostnames +In IPv6 mode (`-6`), it additionally accepts: + +- IPv6 addresses + - `2001:db8::1` +- IPv6 CIDRs + - `2001:db8::/32` +- IPv6 ranges + - `2001:db8::1 - 2001:db8::ff` +- compressed and full notation + - `::1`, `2001:0db8:0000:0000:0000:0000:0000:0001` +- IPv4-mapped IPv6 + - `::ffff:10.0.0.1` +- plain IPv4 (normalized to `::ffff:x.x.x.x` in IPv6 mode) + Important input behavior: - Hostnames are resolved in parallel. - Comments after `#` or `;` are ignored. -- Parsing uses `inet_aton()`, so octal and hex forms are accepted too. +- In IPv4 mode (default), parsing uses `inet_aton()`, so octal and hex forms are accepted too. +- In IPv6 mode, parsing uses `inet_pton(AF_INET6)`. - Inputs can come from `stdin`, files, file lists, or directory expansion. ## Main modes @@ -97,6 +112,33 @@ Generate `ipset restore`-style lines: iprange --print-prefix 'add myset ' --print-suffix '' blocklist.txt ``` +## Address family + +By default, `iprange` operates in IPv4 mode. Use `-6` / `--ipv6` for IPv6: + +```bash +# IPv6 merge +iprange -6 blocklist-v6.txt + +# IPv6 count +iprange -6 -C blocklist-v6.txt + +# IPv4 input normalized to mapped IPv6 +echo "10.0.0.1" | iprange -6 +# output: ::ffff:10.0.0.1 + +# Explicit IPv4 mode (same as default) +iprange -4 blocklist.txt +``` + +Key rules: +- Without `-4` or `-6`, text input defaults to IPv4 mode. +- In IPv6 mode, plain IPv4 input is accepted and normalized to `::ffff:x.x.x.x`. +- Operations between IPv4 and IPv6 datasets are not supported. +- Mixed-family range endpoints (e.g., `10.0.0.1 - 2001:db8::1`) are invalid. +- Binary files declare their family in the header. +- Feature detection: `iprange --has-ipv6` exits with 0 if IPv6 is supported. + ## Build and install From a release tarball: diff --git a/src/iprange.c b/src/iprange.c index c1b7fa8..c913d2a 100644 --- a/src/iprange.c +++ b/src/iprange.c @@ -4,6 +4,11 @@ * Copyright (C) 2003 Gabriel L. Somlo */ #include +#include +#include +#include +#include +#include #include #include #include @@ -13,6 +18,17 @@ int debug; int cidr_use_network = 1; int default_prefix = 32; +/* address family: 0 = default (IPv4), 4 = explicit IPv4, 6 = explicit IPv6 */ +int active_family = 0; + +/* count of IPv6 lines dropped in IPv4 mode (for one-time warning) */ +unsigned long ipv6_dropped_in_ipv4_mode = 0; + +/* forward declaration for IPv6 mode execution */ +extern int iprange6_run(int argc, char **argv, int mode, IPSET_PRINT_CMD print, + int header, int quiet, size_t ipset_reduce_factor, + size_t ipset_reduce_min_accepted); + static inline uint64_t ipset_report_unique_ips(ipset *ips, size_t *entries) { uint64_t unique_ips = ipset_unique_ips(ips); @@ -37,6 +53,25 @@ static void usage(const char *me) { "Options:\n" "multiple options are aliases\n" "\n" + "Address family:\n" + " --ipv4\n" + " -4\n" + " > Force IPv4 mode.\n" + " Only IPv4 addresses are accepted.\n" + " IPv4-mapped IPv6 (::ffff:x.x.x.x) is\n" + " converted back to IPv4. All other IPv6 input\n" + " is dropped with a single warning.\n" + " This is the default for text input.\n" + "\n" + " --ipv6\n" + " -6\n" + " > Force IPv6 mode.\n" + " Both IPv6 and IPv4 addresses are accepted.\n" + " IPv4 input is normalized to IPv4-mapped IPv6\n" + " (::ffff:x.x.x.x). Hostnames are resolved for\n" + " both AAAA and A records.\n" + "\n" + "\n" "CIDR output modes:\n" " --optimize\n" " --combine\n" @@ -349,6 +384,17 @@ static void ipset_chain_append(ipset **head, ipset **tail, ipset *ips) *tail = ips; } +static void ipset6_chain_append(ipset6 **head, ipset6 **tail, ipset6 *ips) +{ + ips->next = NULL; + ips->prev = *tail; + + if(*tail) (*tail)->next = ips; + else *head = ips; + + *tail = ips; +} + static int compare_pathnames(const void *left, const void *right) { const char * const *a = left; @@ -506,6 +552,14 @@ int main(int argc, char **argv) { ipset_reduce_min_accepted = parse_size_option_or_die(option, value, 0, SIZE_MAX, "It must be a non-negative integer."); mode = MODE_REDUCE; } + else if(!strcmp(argv[i], "--ipv4") + || !strcmp(argv[i], "-4")) { + active_family = 4; + } + else if(!strcmp(argv[i], "--ipv6") + || !strcmp(argv[i], "-6")) { + active_family = 6; + } else if(!strcmp(argv[i], "--optimize") || !strcmp(argv[i], "--combine") || !strcmp(argv[i], "--merge") @@ -526,7 +580,7 @@ int main(int argc, char **argv) { || !strcmp(argv[i], "--complement")) { mode = MODE_EXCLUDE_NEXT; read_second = 1; - if(!root) { + if(active_family != 6 && !root) { fprintf(stderr, "%s: An ipset is needed before --except\n", PROG); exit(1); } @@ -535,7 +589,7 @@ int main(int argc, char **argv) { || !strcmp(argv[i], "--diff-next")) { mode = MODE_DIFF; read_second = 1; - if(!root) { + if(active_family != 6 && !root) { fprintf(stderr, "%s: An ipset is needed before --diff\n", PROG); exit(1); } @@ -549,7 +603,7 @@ int main(int argc, char **argv) { else if(!strcmp(argv[i], "--compare-next")) { mode = MODE_COMPARE_NEXT; read_second = 1; - if(!root) { + if(active_family != 6 && !root) { fprintf(stderr, "%s: An ipset is needed before --compare-next\n", PROG); exit(1); } @@ -639,7 +693,23 @@ int main(int argc, char **argv) { fprintf(stderr, "yes, @filename and @directory support is present.\n"); exit(0); } + else if(!strcmp(argv[i], "--has-ipv6")) { + fprintf(stderr, "yes, IPv6 support is present.\n"); + exit(0); + } else { + /* In IPv6 mode, skip IPv4 loading — iprange6_run() handles it */ + if(active_family == 6) { + /* still need to handle 'as NAME' and positional state, but skip loading */ + if(strcmp(argv[i], "-") != 0 && argv[i][0] != '@') { + /* skip 'as NAME' after regular files */ + if(i+1 < argc && !strcmp(argv[i+1], "as") && i+2 < argc) + i += 2; + } + inputs++; + continue; + } + if(!strcmp(argv[i], "-")) { inputs++; if(!(ips = ipset_load(NULL))) { @@ -842,6 +912,14 @@ int main(int argc, char **argv) { } } + /* IPv6 mode: delegate to the IPv6 execution path */ + if(active_family == 6) { + gettimeofday(&load_dt, NULL); + ret = iprange6_run(argc, argv, mode, print, header, quiet, + ipset_reduce_factor, ipset_reduce_min_accepted); + exit(ret); + } + /* * if no ipset was given on the command line * assume stdin, regardless of whether other options were specified diff --git a/src/iprange6.h b/src/iprange6.h new file mode 100644 index 0000000..6ed1722 --- /dev/null +++ b/src/iprange6.h @@ -0,0 +1,186 @@ +#ifndef IPRANGE_IPRANGE6_H +#define IPRANGE_IPRANGE6_H + +#include "iprange.h" +#include + +/* IPv6 address type: 128-bit unsigned integer in host byte order */ +typedef __uint128_t ipv6_addr_t; + +/* IPv6 network address type: one field for the net address, one for broadcast */ +typedef struct network_addr6 { + ipv6_addr_t addr; + ipv6_addr_t broadcast; +} network_addr6_t; + +/* Maximum IPv6 address */ +#define IPV6_ADDR_MAX ((ipv6_addr_t)((__uint128_t)(-1))) + +/* IPv4-mapped IPv6 prefix: ::ffff:0:0/96 */ +#define IPV6_MAPPED_PREFIX ((ipv6_addr_t)0xFFFF00000000ULL) +#define IPV6_MAPPED_MASK ((ipv6_addr_t)0xFFFFFFFFULL) + +#define IP6STR_MAX_LEN 46 + +/*----------------------------------------------------------------------*/ +/* Convert between struct in6_addr (network byte order) and ipv6_addr_t */ +/* (host byte order, big-endian logical order: MSB first) */ +/*----------------------------------------------------------------------*/ + +static inline ipv6_addr_t in6_addr_to_ipv6(const struct in6_addr *in6) { + ipv6_addr_t result = 0; + int i; + for(i = 0; i < 16; i++) + result = (result << 8) | in6->s6_addr[i]; + return result; +} + +static inline void ipv6_to_in6_addr(ipv6_addr_t addr, struct in6_addr *in6) { + int i; + for(i = 15; i >= 0; i--) { + in6->s6_addr[i] = (uint8_t)(addr & 0xFF); + addr >>= 8; + } +} + +/*----------------------------------------------*/ +/* Compute netmask for IPv6 given prefix length */ +/*----------------------------------------------*/ +static inline ipv6_addr_t netmask6(int prefix) { + if(prefix == 0) + return (ipv6_addr_t)0; + if(prefix >= 128) + return IPV6_ADDR_MAX; + return IPV6_ADDR_MAX << (128 - prefix); +} + +/*----------------------------------------------------*/ +/* Compute broadcast address given address and prefix */ +/*----------------------------------------------------*/ +static inline ipv6_addr_t broadcast6(ipv6_addr_t addr, int prefix) { + return addr | ~netmask6(prefix); +} + +/*--------------------------------------------------*/ +/* Compute network address given address and prefix */ +/*--------------------------------------------------*/ +static inline ipv6_addr_t network6(ipv6_addr_t addr, int prefix) { + return addr & netmask6(prefix); +} + +/*------------------------------------------------------------------*/ +/* Set a bit to a given value (0 or 1); MSB is bit 1, LSB is bit 128 */ +/*------------------------------------------------------------------*/ +static inline ipv6_addr_t set_bit6(ipv6_addr_t addr, int bitno, int val) { + if(val) + return addr | ((__uint128_t)1 << (128 - bitno)); + else + return addr & ~((__uint128_t)1 << (128 - bitno)); +} + +/*-----------------------------------------------------------*/ +/* Format an IPv6 address to string using inet_ntop */ +/*-----------------------------------------------------------*/ +static inline char *ip6str_r(char *buf, ipv6_addr_t addr) { + struct in6_addr in6; + ipv6_to_in6_addr(addr, &in6); + inet_ntop(AF_INET6, &in6, buf, IP6STR_MAX_LEN); + return buf; +} + +/*-----------------------------------------------------------*/ +/* Parse an IPv6 address string using inet_pton */ +/* Returns 1 on success, 0 on failure */ +/*-----------------------------------------------------------*/ +static inline int str_to_ipv6(const char *str, ipv6_addr_t *addr) { + struct in6_addr in6; + if(inet_pton(AF_INET6, str, &in6) != 1) + return 0; + *addr = in6_addr_to_ipv6(&in6); + return 1; +} + +/*-----------------------------------------------------------*/ +/* Parse an IPv6 address/prefix string */ +/* Handles: full notation, compressed, dotted-tail mapped */ +/*-----------------------------------------------------------*/ +static inline network_addr6_t str2netaddr6(char *ipstr, int *err) { + int prefix = 128; + char *prefixstr; + network_addr6_t netaddr; + ipv6_addr_t addr; + + if((prefixstr = strchr(ipstr, '/'))) { + *prefixstr = '\0'; + prefixstr++; + errno = 0; + prefix = atoi(prefixstr); + if(unlikely(errno || (*prefixstr == '\0') || prefix < 0 || prefix > 128)) { + if(err) (*err)++; + fprintf(stderr, "%s: Invalid IPv6 prefix /%s\n", PROG, prefixstr); + netaddr.addr = 0; + netaddr.broadcast = 0; + return netaddr; + } + } + + if(!str_to_ipv6(ipstr, &addr)) { + if(err) (*err)++; + fprintf(stderr, "%s: Invalid IPv6 address %s\n", PROG, ipstr); + netaddr.addr = 0; + netaddr.broadcast = 0; + return netaddr; + } + + if(likely(cidr_use_network)) + netaddr.addr = network6(addr, prefix); + else + netaddr.addr = addr; + + netaddr.broadcast = broadcast6(netaddr.addr, prefix); + return netaddr; +} + +/*-----------------------------------------------------------*/ +/* Check if an IPv6 address is IPv4-mapped (::ffff:x.x.x.x) */ +/*-----------------------------------------------------------*/ +static inline int is_ipv4_mapped(ipv6_addr_t addr) { + return (addr >> 32) == 0xFFFF; +} + +/*-----------------------------------------------------------*/ +/* Convert IPv4 to IPv4-mapped IPv6 */ +/*-----------------------------------------------------------*/ +static inline ipv6_addr_t ipv4_to_mapped6(in_addr_t ipv4) { + return IPV6_MAPPED_PREFIX | (ipv6_addr_t)ipv4; +} + +/*-----------------------------------------------------------*/ +/* Extract IPv4 from IPv4-mapped IPv6 */ +/*-----------------------------------------------------------*/ +static inline in_addr_t mapped6_to_ipv4(ipv6_addr_t addr) { + return (in_addr_t)(addr & IPV6_MAPPED_MASK); +} + +/*-----------------------------------------------------------*/ +/* Format a 128-bit unsigned integer to decimal string */ +/* Returns pointer to start of number within buf */ +/* buf must be at least 40 bytes */ +/*-----------------------------------------------------------*/ +static inline char *u128_to_dec(char *buf, size_t buflen, __uint128_t val) { + char *p = buf + buflen - 1; + *p = '\0'; + + if(val == 0) { + *(--p) = '0'; + return p; + } + + while(val > 0) { + *(--p) = '0' + (char)(val % 10); + val /= 10; + } + return p; +} + +#endif /* IPRANGE_IPRANGE6_H */ diff --git a/src/iprange6_main.c b/src/iprange6_main.c new file mode 100644 index 0000000..2776dcf --- /dev/null +++ b/src/iprange6_main.c @@ -0,0 +1,500 @@ +/* + * IPv6 mode main execution. + * Called from iprange.c when active_family == 6. + * Re-scans argv for file arguments and processes them with ipset6. + */ + +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" +#include "ipset6_print.h" +#include "ipset6_binary.h" +#include "ipset6_load.h" +#include +#include + +extern int active_family; +extern unsigned long ipv6_dropped_in_ipv4_mode; + +static void ipset6_chain_append_local(ipset6 **head, ipset6 **tail, ipset6 *ips) +{ + ips->next = NULL; + ips->prev = *tail; + + if(*tail) (*tail)->next = ips; + else *head = ips; + + *tail = ips; +} + +static int compare_pathnames6(const void *left, const void *right) +{ + const char * const *a = left; + const char * const *b = right; + return strcmp(*a, *b); +} + +static void free_pathnames6(char **files, size_t entries) +{ + size_t i; + for(i = 0; i < entries; i++) + free(files[i]); + free(files); +} + +static __uint128_t ipset6_report_unique_ips(ipset6 *ips, size_t *entries) +{ + __uint128_t unique_ips = ipset6_unique_ips(ips); + if(entries) *entries = ips->entries; + return unique_ips; +} + +/* + * iprange6_run() - execute IPv6 mode + * + * Parameters are the same state that main() has after option parsing: + * mode, print format, header flag, quiet flag, etc. + */ +int iprange6_run(int argc, char **argv, int mode, IPSET_PRINT_CMD print, + int header, int quiet, size_t ipset_reduce_factor, + size_t ipset_reduce_min_accepted) +{ + ipset6 *root = NULL, *root_last = NULL, *ips6 = NULL; + ipset6 *first = NULL, *second = NULL, *second_last = NULL; + int i, read_second = 0, inputs = 0, ret = 0; + char u128buf[40]; + + /* re-scan argv for file arguments and positional operators */ + for(i = 1; i < argc; i++) { + /* skip options that take a value */ + if(i+1 < argc && (!strcmp(argv[i], "as") + || !strcmp(argv[i], "--min-prefix") + || !strcmp(argv[i], "--prefixes") + || !strcmp(argv[i], "--default-prefix") || !strcmp(argv[i], "-p") + || !strcmp(argv[i], "--ipset-reduce") || !strcmp(argv[i], "--reduce-factor") + || !strcmp(argv[i], "--ipset-reduce-entries") || !strcmp(argv[i], "--reduce-entries") + || !strcmp(argv[i], "--print-prefix") + || !strcmp(argv[i], "--print-prefix-ips") + || !strcmp(argv[i], "--print-prefix-nets") + || !strcmp(argv[i], "--print-suffix") + || !strcmp(argv[i], "--print-suffix-ips") + || !strcmp(argv[i], "--print-suffix-nets") + || !strcmp(argv[i], "--dns-threads") + )) { + i++; /* skip value */ + continue; + } + + /* skip known flags */ + if(argv[i][0] == '-' && argv[i][1] != '\0' && strcmp(argv[i], "-")) { + /* handle positional operators */ + if(!strcmp(argv[i], "--exclude-next") || !strcmp(argv[i], "--except") + || !strcmp(argv[i], "--complement-next") || !strcmp(argv[i], "--complement")) { + read_second = 1; + continue; + } + if(!strcmp(argv[i], "--diff") || !strcmp(argv[i], "--diff-next")) { + read_second = 1; + continue; + } + if(!strcmp(argv[i], "--compare-next")) { + read_second = 1; + continue; + } + /* all other flags: skip */ + continue; + } + + /* this is a file argument (or "-" for stdin) */ + inputs++; + + if(!strcmp(argv[i], "-")) { + if(!(ips6 = ipset6_load(NULL))) { + fprintf(stderr, "%s: Cannot load ipset from stdin\n", PROG); + exit(1); + } + } + else if(argv[i][0] == '@') { + const char *listname = argv[i] + 1; + struct stat st; + + if(stat(listname, &st) != 0) { + fprintf(stderr, "%s: Cannot access %s: %s\n", PROG, listname, strerror(errno)); + exit(1); + } + + if(S_ISDIR(st.st_mode)) { + DIR *dir; + struct dirent *entry; + char **files = NULL; + size_t files_allocated = 0, files_collected = 0, j; + + dir = opendir(listname); + if(!dir) { + fprintf(stderr, "%s: Cannot open directory: %s - %s\n", PROG, listname, strerror(errno)); + exit(1); + } + + while((entry = readdir(dir))) { + if(!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + + char filepath[FILENAME_MAX + 1]; + snprintf(filepath, FILENAME_MAX, "%s/%s", listname, entry->d_name); + + if(stat(filepath, &st) != 0 || !S_ISREG(st.st_mode)) + continue; + + if(files_collected == files_allocated) { + size_t next_allocated = files_allocated ? files_allocated * 2 : 16; + char **tmp = realloc(files, next_allocated * sizeof(*files)); + if(!tmp) { + closedir(dir); + free_pathnames6(files, files_collected); + fprintf(stderr, "%s: Cannot allocate memory\n", PROG); + exit(1); + } + files = tmp; + files_allocated = next_allocated; + } + + files[files_collected] = strdup(filepath); + if(!files[files_collected]) { + closedir(dir); + free_pathnames6(files, files_collected); + fprintf(stderr, "%s: Cannot allocate memory\n", PROG); + exit(1); + } + files_collected++; + } + closedir(dir); + + if(!files_collected) { + free(files); + fprintf(stderr, "%s: No valid files found in directory: %s\n", PROG, listname); + exit(1); + } + + qsort(files, files_collected, sizeof(*files), compare_pathnames6); + + for(j = 0; j < files_collected; j++) { + if(!(ips6 = ipset6_load(files[j]))) { + fprintf(stderr, "%s: Cannot load file %s\n", PROG, files[j]); + free_pathnames6(files, files_collected); + exit(1); + } + + if(read_second) + ipset6_chain_append_local(&second, &second_last, ips6); + else { + if(!first) first = ips6; + ipset6_chain_append_local(&root, &root_last, ips6); + } + } + free_pathnames6(files, files_collected); + continue; + } + else { + /* file list */ + FILE *fp = fopen(listname, "r"); + char line[MAX_LINE + 1]; + int lineid = 0, files_loaded = 0; + + if(!fp) { + fprintf(stderr, "%s: Cannot open file list: %s - %s\n", PROG, listname, strerror(errno)); + exit(1); + } + + while(fgets(line, MAX_LINE, fp)) { + lineid++; + char *s = line; + while(*s == ' ' || *s == '\t') s++; + if(*s == '\n' || *s == '\r' || *s == '\0' || *s == '#' || *s == ';') + continue; + char *end = s + strlen(s) - 1; + while(end > s && (*end == '\n' || *end == '\r' || *end == ' ' || *end == '\t')) + *end-- = '\0'; + + if(!(ips6 = ipset6_load(s))) { + fprintf(stderr, "%s: Cannot load file %s from list %s (line %d)\n", PROG, s, listname, lineid); + fclose(fp); + exit(1); + } + files_loaded = 1; + + if(read_second) + ipset6_chain_append_local(&second, &second_last, ips6); + else { + if(!first) first = ips6; + ipset6_chain_append_local(&root, &root_last, ips6); + } + } + fclose(fp); + + if(!files_loaded) { + fprintf(stderr, "%s: No valid files found in file list: %s\n", PROG, listname); + exit(1); + } + continue; + } + } + else { + if(!(ips6 = ipset6_load(argv[i]))) { + fprintf(stderr, "%s: Cannot load ipset: %s\n", PROG, argv[i]); + exit(1); + } + } + + /* handle 'as NAME' */ + if(i+1 < argc && !strcmp(argv[i+1], "as") && i+2 < argc) { + strncpy(ips6->filename, argv[i+2], FILENAME_MAX); + ips6->filename[FILENAME_MAX] = '\0'; + i += 2; + } + + if(read_second) + ipset6_chain_append_local(&second, &second_last, ips6); + else { + if(!first) first = ips6; + ipset6_chain_append_local(&root, &root_last, ips6); + } + } + + /* if no files given, read from stdin */ + if(!inputs) { + if(!(first = root = ipset6_load(NULL))) { + fprintf(stderr, "%s: Cannot load ipset from stdin\n", PROG); + exit(1); + } + root_last = root; + } + + if(!root) { + fprintf(stderr, "%s: No valid ipsets to process.\n", PROG); + exit(1); + } + + /* --- mode execution (mirrors the IPv4 logic in main()) --- */ + + #define MODE_COMBINE 1 + #define MODE_COMPARE 2 + #define MODE_COMPARE_FIRST 3 + #define MODE_COMPARE_NEXT 4 + #define MODE_COUNT_UNIQUE_MERGED 5 + #define MODE_COUNT_UNIQUE_ALL 6 + #define MODE_REDUCE 7 + #define MODE_COMMON 8 + #define MODE_EXCLUDE_NEXT 9 + #define MODE_DIFF 10 + + if(mode == MODE_COMBINE || mode == MODE_REDUCE || mode == MODE_COUNT_UNIQUE_MERGED) { + strcpy(root->filename, "combined ipset"); + + for(ips6 = root->next; ips6; ips6 = ips6->next) + if(unlikely(ipset6_merge(root, ips6))) { + fprintf(stderr, "%s: Cannot merge ipset %s\n", PROG, ips6->filename); + exit(1); + } + + if(mode == MODE_REDUCE) { + fprintf(stderr, "%s: --ipset-reduce is not supported in IPv6 mode\n", PROG); + exit(1); + } + + if(mode == MODE_COMBINE) + ipset6_print(root, print); + else if(mode == MODE_COUNT_UNIQUE_MERGED) { + __uint128_t unique_ips = ipset6_report_unique_ips(root, NULL); + if(unlikely(header)) printf("entries,unique_ips\n"); + printf("%zu,%s\n", root->entries, u128_to_dec(u128buf, sizeof(u128buf), unique_ips)); + } + } + else if(mode == MODE_COMMON) { + ipset6 *common = NULL, *ips2 = NULL; + + if(!root->next) { + fprintf(stderr, "%s: two ipsets at least are needed to find common IPs.\n", PROG); + exit(1); + } + + common = ipset6_common(root, root->next); + for(ips6 = root->next->next; ips6; ips6 = ips6->next) { + ips2 = ipset6_common(common, ips6); + ipset6_free(common); + common = ips2; + } + ipset6_print(common, print); + } + else if(mode == MODE_DIFF) { + if(!root || !second) { + fprintf(stderr, "%s: two ipsets at least are needed to be diffed.\n", PROG); + exit(1); + } + + for(ips6 = root->next; ips6; ips6 = ips6->next) + if(unlikely(ipset6_merge(root, ips6))) { + fprintf(stderr, "%s: Cannot merge ipset %s\n", PROG, ips6->filename); + exit(1); + } + if(root->next) strcpy(root->filename, "ipset A"); + + for(ips6 = second->next; ips6; ips6 = ips6->next) + if(unlikely(ipset6_merge(second, ips6))) { + fprintf(stderr, "%s: Cannot merge ipset %s\n", PROG, ips6->filename); + exit(1); + } + if(second->next) strcpy(second->filename, "ipset B"); + + ips6 = ipset6_diff(root, second); + if(!quiet) ipset6_print(ips6, print); + + if(ips6->unique_ips) ret = 1; + else ret = 0; + } + else if(mode == MODE_COMPARE) { + ipset6 *ips2; + + if(!root->next) { + fprintf(stderr, "%s: two ipsets at least are needed to be compared.\n", PROG); + exit(1); + } + + if(unlikely(header)) printf("name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips\n"); + + ipset6_optimize_all(root); + + for(ips6 = root; ips6; ips6 = ips6->next) { + for(ips2 = ips6; ips2; ips2 = ips2->next) { + ipset6 *comips; + size_t entries1, entries2; + __uint128_t unique1 = ipset6_report_unique_ips(ips6, &entries1); + __uint128_t unique2 = ipset6_report_unique_ips(ips2, &entries2); + + if(ips6 == ips2) continue; + + comips = ipset6_combine(ips6, ips2); + if(!comips) { + fprintf(stderr, "%s: Cannot merge ipsets\n", PROG); + exit(1); + } + + ipset6_optimize(comips); + printf("%s,%s,%zu,%zu,%s,", ips6->filename, ips2->filename, entries1, entries2, + u128_to_dec(u128buf, sizeof(u128buf), unique1)); + printf("%s,", u128_to_dec(u128buf, sizeof(u128buf), unique2)); + printf("%s,", u128_to_dec(u128buf, sizeof(u128buf), comips->unique_ips)); + printf("%s\n", u128_to_dec(u128buf, sizeof(u128buf), unique1 + unique2 - comips->unique_ips)); + ipset6_free(comips); + } + } + } + else if(mode == MODE_COMPARE_NEXT) { + ipset6 *ips2; + + if(!second) { + fprintf(stderr, "%s: no files given after the --compare-next parameter.\n", PROG); + exit(1); + } + + if(unlikely(header)) printf("name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips\n"); + + ipset6_optimize_all(root); + ipset6_optimize_all(second); + + for(ips6 = root; ips6; ips6 = ips6->next) { + for(ips2 = second; ips2; ips2 = ips2->next) { + size_t entries1, entries2; + __uint128_t unique1 = ipset6_report_unique_ips(ips6, &entries1); + __uint128_t unique2 = ipset6_report_unique_ips(ips2, &entries2); + + ipset6 *combined = ipset6_combine(ips6, ips2); + if(!combined) { + fprintf(stderr, "%s: Cannot merge ipsets\n", PROG); + exit(1); + } + + ipset6_optimize(combined); + printf("%s,%s,%zu,%zu,%s,", ips6->filename, ips2->filename, entries1, entries2, + u128_to_dec(u128buf, sizeof(u128buf), unique1)); + printf("%s,", u128_to_dec(u128buf, sizeof(u128buf), unique2)); + printf("%s,", u128_to_dec(u128buf, sizeof(u128buf), combined->unique_ips)); + printf("%s\n", u128_to_dec(u128buf, sizeof(u128buf), unique1 + unique2 - combined->unique_ips)); + ipset6_free(combined); + } + } + } + else if(mode == MODE_COMPARE_FIRST) { + if(!root->next) { + fprintf(stderr, "%s: two ipsets at least are needed to be compared.\n", PROG); + exit(1); + } + + if(unlikely(header)) printf("name,entries,unique_ips,common_ips\n"); + + ipset6_optimize_all(root); + + for(ips6 = root; ips6; ips6 = ips6->next) { + size_t entries; + __uint128_t unique_ips = ipset6_report_unique_ips(ips6, &entries); + + if(ips6 == first) continue; + + ipset6 *comips = ipset6_combine(ips6, first); + if(!comips) { + fprintf(stderr, "%s: Cannot merge ipsets\n", PROG); + exit(1); + } + + ipset6_optimize(comips); + printf("%s,%zu,%s,", ips6->filename, entries, + u128_to_dec(u128buf, sizeof(u128buf), unique_ips)); + printf("%s\n", u128_to_dec(u128buf, sizeof(u128buf), unique_ips + first->unique_ips - comips->unique_ips)); + ipset6_free(comips); + } + } + else if(mode == MODE_EXCLUDE_NEXT) { + ipset6 *excluded; + + if(!second) { + fprintf(stderr, "%s: no files given after the --exclude-next parameter.\n", PROG); + exit(1); + } + + for(ips6 = root->next; ips6; ips6 = ips6->next) + if(unlikely(ipset6_merge(root, ips6))) { + fprintf(stderr, "%s: Cannot merge ipset %s\n", PROG, ips6->filename); + exit(1); + } + + excluded = root; + for(ips6 = second; ips6; ips6 = ips6->next) { + ipset6 *tmp = ipset6_exclude(excluded, ips6); + if(!tmp) { + fprintf(stderr, "%s: Cannot exclude IPs\n", PROG); + exit(1); + } + if(excluded != root) ipset6_free(excluded); + excluded = tmp; + } + ipset6_print(excluded, print); + } + else if(mode == MODE_COUNT_UNIQUE_ALL) { + if(unlikely(header)) printf("name,entries,unique_ips\n"); + + ipset6_optimize_all(root); + + for(ips6 = root; ips6; ips6 = ips6->next) { + printf("%s,%zu,%s\n", ips6->filename, ips6->entries, + u128_to_dec(u128buf, sizeof(u128buf), ips6->unique_ips)); + } + } + else { + fprintf(stderr, "%s: Unknown mode.\n", PROG); + exit(1); + } + + (void)ipset_reduce_factor; + (void)ipset_reduce_min_accepted; + + return ret; +} diff --git a/src/ipset6.c b/src/ipset6.c new file mode 100644 index 0000000..09157e2 --- /dev/null +++ b/src/ipset6.c @@ -0,0 +1,94 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +#define IPSET6_ENTRIES_INCREASE_STEP 1024 + +ipset6 *ipset6_create(const char *filename, size_t entries) { + ipset6 *ips = malloc(sizeof(ipset6)); + if(!ips) return NULL; + + if(entries < IPSET6_ENTRIES_INCREASE_STEP) entries = IPSET6_ENTRIES_INCREASE_STEP; + + if(unlikely(ipset6_entries_allocation_overflows(entries))) { + free(ips); + return NULL; + } + + ips->netaddrs = malloc(entries * sizeof(network_addr6_t)); + if(!ips->netaddrs) { + free(ips); + return NULL; + } + + ips->lines = 0; + ips->entries = 0; + ips->entries_max = entries; + ips->unique_ips = 0; + ips->next = NULL; + ips->prev = NULL; + ips->flags = 0; + + strncpy(ips->filename, (filename && *filename)?filename:"stdin", FILENAME_MAX); + ips->filename[FILENAME_MAX] = '\0'; + + return ips; +} + +void ipset6_free(ipset6 *ips) { + if(ips->next) ips->next->prev = ips->prev; + if(ips->prev) ips->prev->next = ips->next; + + free(ips->netaddrs); + free(ips); +} + +void ipset6_free_all(ipset6 *ips) { + ipset6 *prev, *next; + + if(!ips) return; + + prev = ips->prev; + next = ips->next; + + if(prev) { + prev->next = NULL; + ips->prev = NULL; + ipset6_free_all(prev); + } + + if(next) { + next->prev = NULL; + ips->next = NULL; + ipset6_free_all(next); + } + + free(ips->netaddrs); + free(ips); +} + +void ipset6_grow_internal(ipset6 *ips, size_t free_entries_needed) { + size_t increase; + size_t new_entries_max; + + increase = (free_entries_needed < IPSET6_ENTRIES_INCREASE_STEP)?IPSET6_ENTRIES_INCREASE_STEP:free_entries_needed; + if(unlikely(ipset6_size_add_overflows(ips->entries_max, increase, &new_entries_max) || ipset6_entries_allocation_overflows(new_entries_max))) { + fprintf(stderr, "%s: Cannot grow ipset %s safely beyond %zu entries\n", PROG, ips->filename, ips->entries_max); + exit(1); + } + + ips->entries_max = new_entries_max; + + ips->netaddrs = realloc(ips->netaddrs, ips->entries_max * sizeof(network_addr6_t)); + if(unlikely(!ips->netaddrs)) { + fprintf(stderr, "%s: Cannot re-allocate memory (%zu bytes)\n", PROG, ips->entries_max * sizeof(network_addr6_t)); + exit(1); + } +} + +inline __uint128_t ipset6_unique_ips(ipset6 *ips) { + if(unlikely(!(ips->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips); + + return ips->unique_ips; +} diff --git a/src/ipset6.h b/src/ipset6.h new file mode 100644 index 0000000..5a6e8cd --- /dev/null +++ b/src/ipset6.h @@ -0,0 +1,103 @@ +#ifndef IPRANGE_IPSET6_H +#define IPRANGE_IPSET6_H + +#include "iprange6.h" + +typedef struct ipset6 { + char filename[FILENAME_MAX+1]; + + size_t lines; + size_t entries; + size_t entries_max; + __uint128_t unique_ips; + + uint32_t flags; + + struct ipset6 *next; + struct ipset6 *prev; + + network_addr6_t *netaddrs; +} ipset6; + +extern ipset6 *ipset6_create(const char *filename, size_t entries); +extern void ipset6_free(ipset6 *ips); +extern void ipset6_free_all(ipset6 *ips); + +extern size_t prefix6_counters[129]; + +extern __uint128_t ipset6_unique_ips(ipset6 *ips); + +static inline int ipset6_entries_allocation_overflows(size_t entries) { + return (entries > (SIZE_MAX / sizeof(network_addr6_t))); +} + +static inline int ipset6_size_add_overflows(size_t left, size_t right, size_t *sum) { + if(unlikely(left > (SIZE_MAX - right))) return 1; + *sum = left + right; + return 0; +} + +extern void ipset6_grow_internal(ipset6 *ips, size_t free_entries_needed); + +static inline void ipset6_grow(ipset6 *ips, size_t free_entries_needed) { + if(unlikely(!ips)) return; + + if(unlikely(!free_entries_needed)) + free_entries_needed = 1; + + if(unlikely((ips->entries_max - ips->entries) < free_entries_needed)) + ipset6_grow_internal(ips, free_entries_needed); +} + +static inline void ipset6_added_entry(ipset6 *ips) { + size_t entries = ips->entries; + + ips->lines++; + ips->unique_ips += (__uint128_t)ips->netaddrs[entries].broadcast - (__uint128_t)ips->netaddrs[entries].addr + 1; + + if(likely(ips->flags & IPSET_FLAG_OPTIMIZED && entries > 0)) { + if(unlikely(ips->netaddrs[entries].addr == (ips->netaddrs[entries - 1].broadcast + 1))) { + ips->netaddrs[entries - 1].broadcast = ips->netaddrs[entries].broadcast; + return; + } + + if(likely(ips->netaddrs[entries].addr > ips->netaddrs[entries - 1].broadcast)) { + ips->entries++; + return; + } + + ips->flags &= ~IPSET_FLAG_OPTIMIZED; + } + + ips->entries++; +} + +static inline void ipset6_add_ip_range(ipset6 *ips, ipv6_addr_t from, ipv6_addr_t to) { + ipset6_grow(ips, 1); + + ips->netaddrs[ips->entries].addr = from; + ips->netaddrs[ips->entries].broadcast = to; + ipset6_added_entry(ips); +} + +static inline int ipset6_add_ipstr(ipset6 *ips, char *ipstr) { + int err = 0; + + ipset6_grow(ips, 1); + + ips->netaddrs[ips->entries] = str2netaddr6(ipstr, &err); + if(!err) ipset6_added_entry(ips); + return !err; +} + +/* Forward declarations for IPv6 operations */ +extern void ipset6_optimize(ipset6 *ips); +extern void ipset6_optimize_all(ipset6 *root); +extern int ipset6_merge(ipset6 *to, ipset6 *add); +extern ipset6 *ipset6_common(ipset6 *ips1, ipset6 *ips2); +extern ipset6 *ipset6_exclude(ipset6 *ips1, ipset6 *ips2); +extern ipset6 *ipset6_diff(ipset6 *ips1, ipset6 *ips2); +extern ipset6 *ipset6_combine(ipset6 *ips1, ipset6 *ips2); +extern ipset6 *ipset6_copy(ipset6 *ips1); + +#endif /* IPRANGE_IPSET6_H */ diff --git a/src/ipset6_binary.c b/src/ipset6_binary.c new file mode 100644 index 0000000..3bb10ad --- /dev/null +++ b/src/ipset6_binary.c @@ -0,0 +1,289 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" +#include "ipset6_binary.h" + +static uint32_t endianness6 = 0x1A2B3C4D; + +static void binary6_write_failed(void) { + fprintf(stderr, "%s: cannot write binary output: %s\n", PROG, strerror(errno)); + exit(1); +} + +static int binary6_validate_payload(ipset6 *ips, int header_optimized, size_t entries, __uint128_t expected_unique_ips, int *payload_is_optimized) +{ + size_t i; + __uint128_t actual_unique_ips = 0; + + *payload_is_optimized = 1; + + if(!entries) { + if(unlikely(expected_unique_ips != 0)) { + fprintf(stderr, "%s: %s: unique IPs do not match the binary payload\n", PROG, ips->filename); + return 1; + } + return 0; + } + + for(i = 0; i < entries; i++) { + if(unlikely(ips->netaddrs[ips->entries + i].addr > ips->netaddrs[ips->entries + i].broadcast)) { + fprintf(stderr, "%s: %s: invalid binary record %zu has addr > broadcast\n", PROG, ips->filename, i + 1); + return 1; + } + } + + for(i = 1; i < entries; i++) { + network_addr6_t *prev = &ips->netaddrs[ips->entries + i - 1]; + network_addr6_t *curr = &ips->netaddrs[ips->entries + i]; + + if(curr->addr < prev->addr + || curr->addr <= prev->broadcast + || (prev->broadcast != IPV6_ADDR_MAX && curr->addr == (prev->broadcast + 1))) { + *payload_is_optimized = 0; + break; + } + } + + if(*payload_is_optimized) { + for(i = 0; i < entries; i++) { + __uint128_t size = ips->netaddrs[ips->entries + i].broadcast - ips->netaddrs[ips->entries + i].addr + 1; + actual_unique_ips += size; + } + } + else { + /* non-optimized: need to sort and merge to count unique IPs */ + /* for simplicity, we trust the header count for non-optimized v2 payloads */ + /* the data will be re-optimized after loading anyway */ + actual_unique_ips = expected_unique_ips; + } + + if(unlikely(expected_unique_ips != actual_unique_ips)) { + fprintf(stderr, "%s: %s: unique IPs do not match the binary payload\n", PROG, ips->filename); + return 1; + } + + if(unlikely(header_optimized && !*payload_is_optimized)) { + fprintf(stderr, "%s: %s: binary payload claims to be optimized but contains overlapping, adjacent, or unsorted records\n", PROG, ips->filename); + return 1; + } + + return 0; +} + +static int parse_binary6_size_field(ipset6 *ips, const char *field, const char *value, size_t *parsed_value) +{ + char *end = NULL; + unsigned long long parsed; + + if(!value || *value < '0' || *value > '9') { + fprintf(stderr, "%s: %s: invalid %s value '%s'\n", PROG, ips->filename, field, value?value:""); + return 1; + } + + errno = 0; + parsed = strtoull(value, &end, 10); + if(errno || !end || end == value || (*end != '\n' && *end != '\0') || parsed > SIZE_MAX) { + fprintf(stderr, "%s: %s: invalid %s value '%s'\n", PROG, ips->filename, field, value); + return 1; + } + + *parsed_value = (size_t)parsed; + return 0; +} + +static int parse_binary6_u128_field(ipset6 *ips, const char *field, const char *value, __uint128_t *parsed_value) +{ + __uint128_t result = 0; + const char *s = value; + + if(!s || *s < '0' || *s > '9') { + fprintf(stderr, "%s: %s: invalid %s value '%s'\n", PROG, ips->filename, field, s?s:""); + return 1; + } + + while(*s >= '0' && *s <= '9') { + __uint128_t prev = result; + result = result * 10 + (*s - '0'); + if(unlikely(result < prev)) { + fprintf(stderr, "%s: %s: %s value overflow\n", PROG, ips->filename, field); + return 1; + } + s++; + } + + if(*s != '\n' && *s != '\0') { + fprintf(stderr, "%s: %s: invalid %s value '%s'\n", PROG, ips->filename, field, value); + return 1; + } + + *parsed_value = result; + return 0; +} + +int ipset6_load_binary_v20(FILE *fp, ipset6 *ips, int first_line_missing) { + char buffer[MAX_LINE + 1], *s; + size_t entries, bytes, lines, expected_bytes, record_size; + __uint128_t unique_ips; + uint32_t endian; + size_t loaded; + int header_optimized = 0; + int payload_is_optimized = 0; + + if(!first_line_missing) { + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || strcmp(s, BINARY_HEADER_V20)) { + fprintf(stderr, "%s: %s expecting binary v2 header but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + } + + /* family line */ + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || strcmp(s, "ipv6\n")) { + fprintf(stderr, "%s: %s expected family 'ipv6' but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || (strcmp(s, "optimized\n") && strcmp(s, "non-optimized\n"))) { + fprintf(stderr, "%s: %s expected optimized flag but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + if(!strcmp(s, "optimized\n")) header_optimized = 1; + + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || strncmp(s, "record size ", 12)) { + fprintf(stderr, "%s: %s expected record size but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + if(parse_binary6_size_field(ips, "record size", &s[12], &record_size)) + return 1; + if(record_size != sizeof(network_addr6_t)) { + fprintf(stderr, "%s: %s: invalid record size %zu (expected %lu)\n", PROG, ips->filename, record_size, (unsigned long)sizeof(network_addr6_t)); + return 1; + } + + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || strncmp(s, "records ", 8)) { + fprintf(stderr, "%s: %s expected records count but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + if(parse_binary6_size_field(ips, "records", &s[8], &entries)) + return 1; + + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || strncmp(s, "bytes ", 6)) { + fprintf(stderr, "%s: %s expected bytes count but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + if(parse_binary6_size_field(ips, "bytes", &s[6], &bytes)) + return 1; + + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || strncmp(s, "lines ", 6)) { + fprintf(stderr, "%s: %s expected lines count but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + if(parse_binary6_size_field(ips, "lines", &s[6], &lines)) + return 1; + + s = fgets(buffer, MAX_LINE, fp); + buffer[MAX_LINE] = '\0'; + if(!s || strncmp(s, "unique ips ", 11)) { + fprintf(stderr, "%s: %s expected unique ips but found '%s'.\n", PROG, ips->filename, s?s:""); + return 1; + } + if(parse_binary6_u128_field(ips, "unique ips", &s[11], &unique_ips)) + return 1; + + if(entries > ((SIZE_MAX - sizeof(uint32_t)) / sizeof(network_addr6_t))) { + fprintf(stderr, "%s: %s: invalid number of records (%zu)\n", PROG, ips->filename, entries); + return 1; + } + + if(entries > (SIZE_MAX - ips->entries_max)) { + fprintf(stderr, "%s: %s: too many records to load safely (%zu)\n", PROG, ips->filename, entries); + return 1; + } + + expected_bytes = (sizeof(network_addr6_t) * entries) + sizeof(uint32_t); + if(bytes != expected_bytes) { + fprintf(stderr, "%s: %s invalid number of bytes, found %zu, expected %zu.\n", PROG, ips->filename, bytes, expected_bytes); + return 1; + } + + loaded = fread(&endian, sizeof(uint32_t), 1, fp); + if(loaded != 1) { + fprintf(stderr, "%s: %s: cannot load ipset header\n", PROG, ips->filename); + return 1; + } + + if(endian != endianness6) { + fprintf(stderr, "%s: %s: incompatible endianness\n", PROG, ips->filename); + return 1; + } + + if(lines < entries) { + fprintf(stderr, "%s: %s: lines (%zu) cannot be less than entries (%zu)\n", PROG, ips->filename, lines, entries); + return 1; + } + + ipset6_grow(ips, entries); + + loaded = fread(&ips->netaddrs[ips->entries], sizeof(network_addr6_t), entries, fp); + + if(loaded != entries) { + fprintf(stderr, "%s: %s: expected to load %zu entries, loaded %zu\n", PROG, ips->filename, entries, loaded); + return 1; + } + + if(fread(buffer, 1, 1, fp) != 0) { + fprintf(stderr, "%s: %s: trailing data found after binary payload\n", PROG, ips->filename); + return 1; + } + if(ferror(fp)) { + fprintf(stderr, "%s: %s: error while checking for trailing binary data\n", PROG, ips->filename); + return 1; + } + + if(binary6_validate_payload(ips, header_optimized, entries, unique_ips, &payload_is_optimized)) + return 1; + + ips->entries += loaded; + ips->lines += lines; + ips->unique_ips += unique_ips; + ips->flags &= ~IPSET_FLAG_OPTIMIZED; + if(header_optimized && payload_is_optimized) ips->flags |= IPSET_FLAG_OPTIMIZED; + + return 0; +} + +void ipset6_save_binary_v20(ipset6 *ips) { + char u128buf[40]; + + if(!ips->entries) return; + + if(fprintf(stdout, BINARY_HEADER_V20) < 0) binary6_write_failed(); + if(fprintf(stdout, "ipv6\n") < 0) binary6_write_failed(); + if(ips->flags & IPSET_FLAG_OPTIMIZED) { + if(fprintf(stdout, "optimized\n") < 0) binary6_write_failed(); + } + else if(fprintf(stdout, "non-optimized\n") < 0) { + binary6_write_failed(); + } + if(fprintf(stdout, "record size %zu\n", sizeof(network_addr6_t)) < 0) binary6_write_failed(); + if(fprintf(stdout, "records %zu\n", ips->entries) < 0) binary6_write_failed(); + if(fprintf(stdout, "bytes %zu\n", (sizeof(network_addr6_t) * ips->entries) + sizeof(uint32_t)) < 0) binary6_write_failed(); + if(fprintf(stdout, "lines %zu\n", ips->lines) < 0) binary6_write_failed(); + if(fprintf(stdout, "unique ips %s\n", u128_to_dec(u128buf, sizeof(u128buf), ips->unique_ips)) < 0) binary6_write_failed(); + if(fwrite(&endianness6, sizeof(uint32_t), 1, stdout) != 1) binary6_write_failed(); + if(fwrite(ips->netaddrs, sizeof(network_addr6_t), ips->entries, stdout) != ips->entries) binary6_write_failed(); + if(fflush(stdout) != 0) binary6_write_failed(); +} diff --git a/src/ipset6_binary.h b/src/ipset6_binary.h new file mode 100644 index 0000000..ececcbf --- /dev/null +++ b/src/ipset6_binary.h @@ -0,0 +1,11 @@ +#ifndef IPRANGE_IPSET6_BINARY_H +#define IPRANGE_IPSET6_BINARY_H + +#include "ipset6.h" + +#define BINARY_HEADER_V20 "iprange binary format v2.0\n" + +extern int ipset6_load_binary_v20(FILE *fp, ipset6 *ips, int first_line_missing); +extern void ipset6_save_binary_v20(ipset6 *ips); + +#endif /* IPRANGE_IPSET6_BINARY_H */ diff --git a/src/ipset6_combine.c b/src/ipset6_combine.c new file mode 100644 index 0000000..e7c1083 --- /dev/null +++ b/src/ipset6_combine.c @@ -0,0 +1,36 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +inline ipset6 *ipset6_combine(ipset6 *ips1, ipset6 *ips2) { + ipset6 *ips; + size_t total_entries, total_lines; + + if(unlikely(debug)) fprintf(stderr, "%s: Combining %s and %s (IPv6)\n", PROG, ips1->filename, ips2->filename); + + if(unlikely(ips1->entries > ips1->entries_max || ips2->entries > ips2->entries_max)) { + fprintf(stderr, "%s: Cannot combine ipsets %s and %s because one of them has an invalid internal entry count\n", PROG, ips1->filename, ips2->filename); + return NULL; + } + + if(unlikely(ipset6_size_add_overflows(ips1->entries, ips2->entries, &total_entries) || ipset6_entries_allocation_overflows(total_entries))) { + fprintf(stderr, "%s: Cannot combine ipsets %s and %s safely: too many entries\n", PROG, ips1->filename, ips2->filename); + return NULL; + } + + if(unlikely(ipset6_size_add_overflows(ips1->lines, ips2->lines, &total_lines))) { + fprintf(stderr, "%s: Cannot combine ipsets %s and %s safely: too many input lines\n", PROG, ips1->filename, ips2->filename); + return NULL; + } + + ips = ipset6_create("combined", total_entries); + if(unlikely(!ips)) return NULL; + + memcpy(&ips->netaddrs[0], &ips1->netaddrs[0], ips1->entries * sizeof(network_addr6_t)); + memcpy(&ips->netaddrs[ips1->entries], &ips2->netaddrs[0], ips2->entries * sizeof(network_addr6_t)); + + ips->entries = total_entries; + ips->lines = total_lines; + + return ips; +} diff --git a/src/ipset6_common.c b/src/ipset6_common.c new file mode 100644 index 0000000..d3ba773 --- /dev/null +++ b/src/ipset6_common.c @@ -0,0 +1,80 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +inline ipset6 *ipset6_common(ipset6 *ips1, ipset6 *ips2) { + ipset6 *ips; + unsigned long int n1, n2, i1 = 0, i2 = 0; + ipv6_addr_t lo1, lo2, hi1, hi2, lo, hi; + + if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips1); + + if(unlikely(!(ips2->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips2); + + if(unlikely(debug)) fprintf(stderr, "%s: Finding common IPs in %s and %s (IPv6)\n", PROG, ips1->filename, ips2->filename); + + ips = ipset6_create("common", 0); + if(unlikely(!ips)) return NULL; + + n1 = ips1->entries; + n2 = ips2->entries; + + if(unlikely(n1 == 0 || n2 == 0)) { + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + return ips; + } + + lo1 = ips1->netaddrs[0].addr; + lo2 = ips2->netaddrs[0].addr; + hi1 = ips1->netaddrs[0].broadcast; + hi2 = ips2->netaddrs[0].broadcast; + + while(i1 < n1 && i2 < n2) { + if(lo1 > hi2) { + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + continue; + } + + if(lo2 > hi1) { + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + continue; + } + + lo = (lo1 > lo2) ? lo1 : lo2; + + if(hi1 < hi2) { + hi = hi1; + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + } + else { + hi = hi2; + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + } + + ipset6_add_ip_range(ips, lo, hi); + } + + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + + return ips; +} diff --git a/src/ipset6_copy.c b/src/ipset6_copy.c new file mode 100644 index 0000000..8a32f1b --- /dev/null +++ b/src/ipset6_copy.c @@ -0,0 +1,26 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +inline ipset6 *ipset6_copy(ipset6 *ips1) { + ipset6 *ips; + + if(unlikely(debug)) fprintf(stderr, "%s: Copying %s (IPv6)\n", PROG, ips1->filename); + + if(unlikely(ips1->entries > ips1->entries_max)) { + fprintf(stderr, "%s: Cannot copy ipset %s because it has an invalid internal entry count\n", PROG, ips1->filename); + return NULL; + } + + ips = ipset6_create(ips1->filename, ips1->entries); + if(unlikely(!ips)) return NULL; + + memcpy(&ips->netaddrs[0], &ips1->netaddrs[0], ips1->entries * sizeof(network_addr6_t)); + + ips->entries = ips1->entries; + ips->unique_ips = ips1->unique_ips; + ips->lines = ips1->lines; + ips->flags = ips1->flags; + + return ips; +} diff --git a/src/ipset6_diff.c b/src/ipset6_diff.c new file mode 100644 index 0000000..f7ba101 --- /dev/null +++ b/src/ipset6_diff.c @@ -0,0 +1,132 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +inline ipset6 *ipset6_diff(ipset6 *ips1, ipset6 *ips2) { + ipset6 *ips; + unsigned long int n1, n2, i1 = 0, i2 = 0; + ipv6_addr_t lo1, lo2, hi1, hi2; + + if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips1); + + if(unlikely(!(ips2->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips2); + + if(unlikely(debug)) fprintf(stderr, "%s: Finding diff IPs in %s and %s (IPv6)\n", PROG, ips1->filename, ips2->filename); + + ips = ipset6_create("diff", 0); + if(unlikely(!ips)) return NULL; + + n1 = ips1->entries; + n2 = ips2->entries; + + if(unlikely(n1 == 0 && n2 == 0)) { + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + return ips; + } + + if(unlikely(n1 == 0)) { + while(i2 < n2) { + ipset6_add_ip_range(ips, ips2->netaddrs[i2].addr, ips2->netaddrs[i2].broadcast); + i2++; + } + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + return ips; + } + + if(unlikely(n2 == 0)) { + while(i1 < n1) { + ipset6_add_ip_range(ips, ips1->netaddrs[i1].addr, ips1->netaddrs[i1].broadcast); + i1++; + } + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + return ips; + } + + lo1 = ips1->netaddrs[0].addr; + lo2 = ips2->netaddrs[0].addr; + hi1 = ips1->netaddrs[0].broadcast; + hi2 = ips2->netaddrs[0].broadcast; + + while(i1 < n1 && i2 < n2) { + if(lo1 > hi2) { + ipset6_add_ip_range(ips, lo2, hi2); + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + continue; + } + if(lo2 > hi1) { + ipset6_add_ip_range(ips, lo1, hi1); + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + continue; + } + + if(lo1 > lo2) + ipset6_add_ip_range(ips, lo2, lo1 - 1); + else if(lo2 > lo1) + ipset6_add_ip_range(ips, lo1, lo2 - 1); + + if(hi1 > hi2) { + lo1 = hi2 + 1; + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + continue; + } + else if(hi2 > hi1) { + lo2 = hi1 + 1; + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + continue; + } + else { + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + } + } + while(i1 < n1) { + ipset6_add_ip_range(ips, lo1, hi1); + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + } + while(i2 < n2) { + ipset6_add_ip_range(ips, lo2, hi2); + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + } + + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + + return ips; +} diff --git a/src/ipset6_exclude.c b/src/ipset6_exclude.c new file mode 100644 index 0000000..390e2fb --- /dev/null +++ b/src/ipset6_exclude.c @@ -0,0 +1,111 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +inline ipset6 *ipset6_exclude(ipset6 *ips1, ipset6 *ips2) { + ipset6 *ips; + unsigned long int n1, n2, i1 = 0, i2 = 0; + ipv6_addr_t lo1, lo2, hi1, hi2; + + if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips1); + + if(unlikely(!(ips2->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips2); + + if(unlikely(debug)) fprintf(stderr, "%s: Removing IPs in %s from %s (IPv6)\n", PROG, ips2->filename, ips1->filename); + + ips = ipset6_create(ips1->filename, 0); + if(unlikely(!ips)) return NULL; + + n1 = ips1->entries; + n2 = ips2->entries; + + if(unlikely(n1 == 0)) { + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + return ips; + } + + if(unlikely(n2 == 0)) { + while(i1 < n1) { + ipset6_add_ip_range(ips, ips1->netaddrs[i1].addr, ips1->netaddrs[i1].broadcast); + i1++; + } + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + return ips; + } + + lo1 = ips1->netaddrs[0].addr; + lo2 = ips2->netaddrs[0].addr; + hi1 = ips1->netaddrs[0].broadcast; + hi2 = ips2->netaddrs[0].broadcast; + + while(i1 < n1 && i2 < n2) { + if(lo1 > hi2) { + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + continue; + } + + if(lo2 > hi1) { + ipset6_add_ip_range(ips, lo1, hi1); + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + continue; + } + + if(lo1 < lo2) { + ipset6_add_ip_range(ips, lo1, lo2 - 1); + lo1 = lo2; + } + + if(hi1 == hi2) { + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + } + else if(hi1 < hi2) { + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + } + else { + lo1 = hi2 + 1; + i2++; + if(i2 < n2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } + } + } + + if(i1 < n1) { + ipset6_add_ip_range(ips, lo1, hi1); + i1++; + while(i1 < n1) { + ipset6_add_ip_range(ips, ips1->netaddrs[i1].addr, ips1->netaddrs[i1].broadcast); + i1++; + } + } + + ips->lines = ips1->lines + ips2->lines; + ips->flags |= IPSET_FLAG_OPTIMIZED; + return ips; +} diff --git a/src/ipset6_load.c b/src/ipset6_load.c new file mode 100644 index 0000000..75490f6 --- /dev/null +++ b/src/ipset6_load.c @@ -0,0 +1,603 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" +#include "ipset6_binary.h" +#include "ipset6_load.h" + +#define MAX_INPUT_ELEMENT6 256 + +/* address family for the current invocation */ +extern int active_family; +extern unsigned long ipv6_dropped_in_ipv4_mode; + +/* + * Classify a token as IPv6, IPv4, or hostname. + * Returns: + * 6 = definitely IPv6 (contains ':') + * 4 = looks like IPv4 (digits/dots/slash, no colons) + * 0 = hostname or unknown + */ +static inline int classify_address(const char *token) { + if(strchr(token, ':')) return 6; + if(strchr(token, '.') || strchr(token, '/')) return 4; + /* pure digits could be IPv4 integer or hostname */ + const char *s = token; + int all_digits = 1; + while(*s) { + if(*s < '0' || *s > '9') { all_digits = 0; break; } + s++; + } + if(all_digits && s != token) return 4; + return 0; +} + +/* + * Parse a line that may contain IPv6 addresses. + * Returns the same IPSET_LINE_TYPE enum values as the IPv4 parser. + * + * Accepted formats: + * - IPv6 address: 2001:db8::1 + * - IPv6 CIDR: 2001:db8::/32 + * - IPv6 range: 2001:db8::1 - 2001:db8::ff + * - IPv4 address (for normalization to mapped IPv6) + * - hostname (for DNS resolution) + */ +typedef enum { + LINE6_IS_INVALID = -1, + LINE6_IS_EMPTY = 0, + LINE6_HAS_1_IP = 1, + LINE6_HAS_2_IPS = 2, + LINE6_HAS_1_HOSTNAME = 3 +} IPSET6_LINE_TYPE; + +static inline int is_ipv6_char(char c) { + return ((c >= '0' && c <= '9') + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F') + || c == ':' || c == '.' || c == '/'); +} + +static inline int is_hostname_char6(char c) { + return ((c >= '0' && c <= '9') + || (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || c == '_' || c == '-' || c == '.'); +} + +static inline IPSET6_LINE_TYPE parse_line6(char *line, int lineid, char *ipstr, char *ipstr2, int len) { + char *s = line; + int i = 0; + int has_colon = 0; + + (void)lineid; + + while(*s == ' ' || *s == '\t') s++; + if(*s == '#' || *s == ';') return LINE6_IS_EMPTY; + if(*s == '\r' || *s == '\n' || *s == '\0') return LINE6_IS_EMPTY; + + /* scan first token: accept IPv6 chars (hex digits, colons, dots, slash) */ + while(i < len && is_ipv6_char(*s)) { + if(*s == ':') has_colon = 1; + ipstr[i++] = *s++; + } + + /* if no chars matched in the IPv6 set, try hostname */ + if(!i) { + /* try as hostname */ + i = 0; + s = line; + while(*s == ' ' || *s == '\t') s++; + while(i < len && is_hostname_char6(*s)) + ipstr[i++] = *s++; + if(!i) return LINE6_IS_INVALID; + ipstr[i] = '\0'; + while(*s == ' ' || *s == '\t') s++; + if(*s == '#' || *s == ';' || *s == '\r' || *s == '\n' || *s == '\0') + return LINE6_HAS_1_HOSTNAME; + return LINE6_IS_INVALID; + } + + ipstr[i] = '\0'; + + while(*s == ' ' || *s == '\t') s++; + if(*s == '#' || *s == ';' || *s == '\r' || *s == '\n' || *s == '\0') + return LINE6_HAS_1_IP; + + if(*s != '-') { + /* if first token has no colon and doesn't look like an IP, try hostname */ + if(!has_colon && classify_address(ipstr) == 0) { + i = 0; + s = line; + while(*s == ' ' || *s == '\t') s++; + while(i < len && is_hostname_char6(*s)) + ipstr[i++] = *s++; + if(i) { + ipstr[i] = '\0'; + while(*s == ' ' || *s == '\t') s++; + if(*s == '#' || *s == ';' || *s == '\r' || *s == '\n' || *s == '\0') + return LINE6_HAS_1_HOSTNAME; + } + } + return LINE6_IS_INVALID; + } + + /* skip the dash */ + s++; + while(*s == ' ' || *s == '\t') s++; + + if(*s == '#' || *s == ';' || *s == '\r' || *s == '\n' || *s == '\0') { + fprintf(stderr, "%s: Incomplete range on line, expected an address after -\n", PROG); + return LINE6_HAS_1_IP; + } + + /* scan second token */ + i = 0; + while(i < len && is_ipv6_char(*s)) + ipstr2[i++] = *s++; + + if(!i) return LINE6_IS_INVALID; + ipstr2[i] = '\0'; + + while(*s == ' ' || *s == '\t') s++; + if(*s == '#' || *s == ';' || *s == '\r' || *s == '\n' || *s == '\0') + return LINE6_HAS_2_IPS; + + return LINE6_IS_INVALID; +} + +/* + * Parse an address string in IPv6 mode. + * Accepts both IPv6 and IPv4 (normalizing IPv4 to mapped IPv6). + * Returns the parsed network_addr6_t. + */ +static network_addr6_t parse_address6(char *ipstr, int *err) { + network_addr6_t netaddr; + int addr_class = classify_address(ipstr); + + if(addr_class == 6) { + /* IPv6 literal */ + return str2netaddr6(ipstr, err); + } + else if(addr_class == 4) { + /* IPv4 literal: normalize to mapped IPv6 */ + network_addr_t v4 = str2netaddr(ipstr, err); + if(*err) { + netaddr.addr = 0; + netaddr.broadcast = 0; + return netaddr; + } + + /* handle CIDR: if the IPv4 had a prefix, map the range */ + netaddr.addr = ipv4_to_mapped6(v4.addr); + netaddr.broadcast = ipv4_to_mapped6(v4.broadcast); + return netaddr; + } + + /* unknown format */ + if(err) (*err)++; + fprintf(stderr, "%s: Cannot parse address: %s\n", PROG, ipstr); + netaddr.addr = 0; + netaddr.broadcast = 0; + return netaddr; +} + +/* DNS structures and functions from ipset_load.c */ +extern int dns_threads_max; +extern int dns_silent; +extern int dns_progress; + +/* IPv6 DNS resolution types */ +typedef struct dnsreq6 { + struct dnsreq6 *next; + char tries; + char hostname[]; +} DNSREQ6; + +typedef struct dnsrep6 { + ipv6_addr_t ip; + struct dnsrep6 *next; +} DNSREP6; + +static DNSREQ6 *dns6_requests; +static DNSREP6 *dns6_replies; +static int dns6_threads; +static unsigned long dns6_requests_pending; +static unsigned long dns6_requests_made; +static unsigned long dns6_requests_finished; +static unsigned long dns6_requests_retries; +static unsigned long dns6_replies_found; +static unsigned long dns6_replies_failed; + +static pthread_cond_t dns6_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t dns6_requests_mut = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t dns6_replies_mut = PTHREAD_MUTEX_INITIALIZER; + +static void dns6_reset_stats(void) +{ + pthread_mutex_lock(&dns6_requests_mut); + dns6_requests = NULL; + dns6_requests_pending = 0; + dns6_requests_made = 0; + dns6_requests_finished = 0; + dns6_requests_retries = 0; + dns6_replies_found = 0; + dns6_replies_failed = 0; + pthread_mutex_unlock(&dns6_requests_mut); + + pthread_mutex_lock(&dns6_replies_mut); + dns6_replies = NULL; + pthread_mutex_unlock(&dns6_replies_mut); +} + +static void *dns6_thread_resolve(void *ptr); + +static void dns6_signal_threads(void) +{ + pthread_mutex_lock(&dns6_requests_mut); + pthread_cond_signal(&dns6_cond); + pthread_mutex_unlock(&dns6_requests_mut); +} + +static int dns6_request_add(DNSREQ6 *d) +{ + unsigned long pending; + + pthread_mutex_lock(&dns6_requests_mut); + d->next = dns6_requests; + dns6_requests = d; + dns6_requests_pending++; + dns6_requests_made++; + pending = dns6_requests_pending; + pthread_mutex_unlock(&dns6_requests_mut); + + if(pending > (unsigned long)dns6_threads && dns6_threads < dns_threads_max) { + pthread_t thread; + if(pthread_create(&thread, NULL, dns6_thread_resolve, NULL)) { + fprintf(stderr, "%s: Cannot create DNS thread.\n", PROG); + if(dns6_threads == 0) { + pthread_mutex_lock(&dns6_requests_mut); + dns6_requests = d->next; + dns6_requests_pending--; + dns6_requests_made--; + pthread_mutex_unlock(&dns6_requests_mut); + free(d); + return -1; + } + } + else { + dns6_threads++; + pthread_detach(thread); + } + } + + dns6_signal_threads(); + return 0; +} + +static void dns6_request_done(DNSREQ6 *d, int added) +{ + pthread_mutex_lock(&dns6_requests_mut); + dns6_requests_pending--; + dns6_requests_finished++; + if(!added) dns6_replies_failed++; + else dns6_replies_found += added; + pthread_mutex_unlock(&dns6_requests_mut); + free(d); +} + +static void dns6_request_failed(DNSREQ6 *d, int added, int gai_error) +{ + switch(gai_error) { + case EAI_AGAIN: + if(d->tries > 0) { + if(!dns_silent) + fprintf(stderr, "%s: DNS: '%s' will be retried: %s\n", PROG, d->hostname, gai_strerror(gai_error)); + d->tries--; + pthread_mutex_lock(&dns6_requests_mut); + d->next = dns6_requests; + dns6_requests = d; + dns6_requests_retries++; + dns6_replies_found += added; + pthread_mutex_unlock(&dns6_requests_mut); + return; + } + /* fall through */ + default: + if(!dns_silent) + fprintf(stderr, "%s: DNS: '%s' failed: %s\n", PROG, d->hostname, gai_strerror(gai_error)); + dns6_request_done(d, added); + return; + } +} + +static DNSREQ6 *dns6_request_get(void) +{ + DNSREQ6 *ret = NULL; + + while(!ret) { + pthread_mutex_lock(&dns6_requests_mut); + if(dns6_requests) { + ret = dns6_requests; + dns6_requests = dns6_requests->next; + ret->next = NULL; + } + pthread_mutex_unlock(&dns6_requests_mut); + if(ret) continue; + + pthread_mutex_lock(&dns6_requests_mut); + while(!dns6_requests) + pthread_cond_wait(&dns6_cond, &dns6_requests_mut); + pthread_mutex_unlock(&dns6_requests_mut); + } + + return ret; +} + +/* + * DNS thread for IPv6 mode: resolves both AAAA and A records. + * A records are normalized to IPv4-mapped IPv6 (::ffff:x.x.x.x). + */ +static void *dns6_thread_resolve(void *ptr) +{ + DNSREQ6 *d; + (void)ptr; + + while((d = dns6_request_get())) { + int added = 0; + int r; + struct addrinfo *result, *rp, hints; + + /* resolve both IPv4 and IPv6 */ + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + + r = getaddrinfo(d->hostname, "80", &hints, &result); + if(r != 0) { + dns6_request_failed(d, 0, r); + continue; + } + + for(rp = result; rp != NULL; rp = rp->ai_next) { + DNSREP6 *p; + ipv6_addr_t ip; + + if(rp->ai_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)rp->ai_addr; + ip = in6_addr_to_ipv6(&sa6->sin6_addr); + } + else if(rp->ai_family == AF_INET) { + struct sockaddr_in *sa4 = (struct sockaddr_in *)rp->ai_addr; + ip = ipv4_to_mapped6(ntohl(sa4->sin_addr.s_addr)); + } + else continue; + + p = malloc(sizeof(DNSREP6)); + if(!p) { + fprintf(stderr, "%s: DNS: out of memory while resolving host '%s'\n", PROG, d->hostname); + continue; + } + + p->ip = ip; + pthread_mutex_lock(&dns6_replies_mut); + p->next = dns6_replies; + dns6_replies = p; + added++; + pthread_mutex_unlock(&dns6_replies_mut); + } + + freeaddrinfo(result); + dns6_request_done(d, added); + } + + return NULL; +} + +static void dns6_process_replies(ipset6 *ips) +{ + pthread_mutex_lock(&dns6_replies_mut); + while(dns6_replies) { + DNSREP6 *p; + ipset6_add_ip_range(ips, dns6_replies->ip, dns6_replies->ip); + p = dns6_replies->next; + free(dns6_replies); + dns6_replies = p; + } + pthread_mutex_unlock(&dns6_replies_mut); +} + +static int dns6_request(ipset6 *ips, char *hostname) +{ + DNSREQ6 *d; + + dns6_process_replies(ips); + + d = malloc(sizeof(DNSREQ6) + strlen(hostname) + 1); + if(!d) { + fprintf(stderr, "%s: out of memory, while trying to resolve '%s'\n", PROG, hostname); + return -1; + } + + strcpy(d->hostname, hostname); + d->tries = 20; + + if(dns6_request_add(d)) + return -1; + + return 0; +} + +static int dns6_done(ipset6 *ips) +{ + unsigned long pending, made; + + pthread_mutex_lock(&dns6_requests_mut); + made = dns6_requests_made; + pthread_mutex_unlock(&dns6_requests_mut); + + if(!made) { + dns6_reset_stats(); + return 0; + } + + while(1) { + pthread_mutex_lock(&dns6_requests_mut); + pending = dns6_requests_pending; + pthread_mutex_unlock(&dns6_requests_mut); + + if(!pending) break; + + dns6_process_replies(ips); + + if(pending) { + dns6_signal_threads(); + sleep(1); + } + } + dns6_process_replies(ips); + + dns6_reset_stats(); + return 0; +} + +/* + * ipset6_load() + * + * Load a file into an IPv6 ipset. + * - IPv6 addresses are parsed directly + * - IPv4 addresses are normalized to IPv4-mapped IPv6 + * - Hostnames are resolved for both AAAA and A records + */ +ipset6 *ipset6_load(const char *filename) { + FILE *fp = stdin; + int lineid = 0; + int parse_errors = 0; + char line[MAX_LINE + 1], ipstr[MAX_INPUT_ELEMENT6 + 1], ipstr2[MAX_INPUT_ELEMENT6 + 1]; + ipset6 *ips = ipset6_create((filename && *filename)?filename:"stdin", 0); + + if(unlikely(!ips)) return NULL; + + if(likely(filename && *filename)) { + fp = fopen(filename, "r"); + if(unlikely(!fp)) { + fprintf(stderr, "%s: %s - %s\n", PROG, filename, strerror(errno)); + ipset6_free(ips); + return NULL; + } + } + + if(unlikely(debug)) fprintf(stderr, "%s: Loading from %s (IPv6 mode)\n", PROG, ips->filename); + + ips->flags |= IPSET_FLAG_OPTIMIZED; + + if(!fgets(line, MAX_LINE, fp)) { + if(likely(fp != stdin)) fclose(fp); + return ips; + } + + /* check for binary headers */ + if(!strcmp(line, BINARY_HEADER_V20)) { + if(ipset6_load_binary_v20(fp, ips, 1)) { + fprintf(stderr, "%s: Cannot load binary v2 %s\n", PROG, filename); + ipset6_free(ips); + ips = NULL; + } + if(likely(fp != stdin)) fclose(fp); + return ips; + } + + /* reject v1.0 binary in IPv6 mode */ + if(!strcmp(line, BINARY_HEADER_V10)) { + fprintf(stderr, "%s: %s: IPv4 binary file cannot be loaded in IPv6 mode\n", PROG, ips->filename); + ipset6_free(ips); + if(likely(fp != stdin)) fclose(fp); + return NULL; + } + + do { + lineid++; + + switch(parse_line6(line, lineid, ipstr, ipstr2, MAX_INPUT_ELEMENT6)) { + case LINE6_IS_INVALID: + fprintf(stderr, "%s: Cannot understand line No %d from %s: %s\n", PROG, lineid, ips->filename, line); + parse_errors = 1; + break; + + case LINE6_IS_EMPTY: + break; + + case LINE6_HAS_1_IP: + { + int err = 0; + network_addr6_t net = parse_address6(ipstr, &err); + if(unlikely(err)) { + fprintf(stderr, "%s: Cannot understand line No %d from %s: %s\n", PROG, lineid, ips->filename, line); + parse_errors = 1; + } + else { + ipset6_add_ip_range(ips, net.addr, net.broadcast); + } + } + break; + + case LINE6_HAS_2_IPS: + { + int err = 0; + network_addr6_t net1 = parse_address6(ipstr, &err); + network_addr6_t net2; + if(likely(!err)) net2 = parse_address6(ipstr2, &err); + if(unlikely(err)) { + fprintf(stderr, "%s: Cannot understand line No %d from %s: %s\n", PROG, lineid, ips->filename, line); + parse_errors = 1; + continue; + } + + /* check for mixed-family range endpoints */ + int c1 = classify_address(ipstr); + int c2 = classify_address(ipstr2); + if(c1 != c2 && c1 != 0 && c2 != 0) { + fprintf(stderr, "%s: Mixed-family range on line %d: %s - %s\n", PROG, lineid, ipstr, ipstr2); + parse_errors = 1; + continue; + } + + ipv6_addr_t lo = (net1.addr < net2.addr) ? net1.addr : net2.addr; + ipv6_addr_t hi = (net1.broadcast > net2.broadcast) ? net1.broadcast : net2.broadcast; + ipset6_add_ip_range(ips, lo, hi); + } + break; + + case LINE6_HAS_1_HOSTNAME: + if(unlikely(debug)) + fprintf(stderr, "%s: DNS resolution for hostname '%s' from line %d of file %s (IPv6 mode).\n", PROG, ipstr, lineid, ips->filename); + + if(unlikely(dns6_request(ips, ipstr))) { + if(likely(fp != stdin)) fclose(fp); + dns6_reset_stats(); + ipset6_free(ips); + return NULL; + } + break; + + default: + fprintf(stderr, "%s: Cannot understand result code. This is an internal error.\n", PROG); + exit(1); + } + } while(likely(ips && fgets(line, MAX_LINE, fp))); + + if(likely(fp != stdin)) fclose(fp); + + if(unlikely(dns6_done(ips))) { + ipset6_free(ips); + return NULL; + } + + if(unlikely(!ips)) return NULL; + + if(unlikely(parse_errors)) { + ipset6_free(ips); + return NULL; + } + + return ips; +} diff --git a/src/ipset6_load.h b/src/ipset6_load.h new file mode 100644 index 0000000..7a7178c --- /dev/null +++ b/src/ipset6_load.h @@ -0,0 +1,8 @@ +#ifndef IPRANGE_IPSET6_LOAD_H +#define IPRANGE_IPSET6_LOAD_H + +#include "ipset6.h" + +extern ipset6 *ipset6_load(const char *filename); + +#endif /* IPRANGE_IPSET6_LOAD_H */ diff --git a/src/ipset6_merge.c b/src/ipset6_merge.c new file mode 100644 index 0000000..3752b97 --- /dev/null +++ b/src/ipset6_merge.c @@ -0,0 +1,33 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +inline int ipset6_merge(ipset6 *to, ipset6 *add) { + size_t total_entries, total_lines; + + if(unlikely(debug)) fprintf(stderr, "%s: Merging %s to %s (IPv6)\n", PROG, add->filename, to->filename); + + if(unlikely(to->entries > to->entries_max || add->entries > add->entries_max)) { + fprintf(stderr, "%s: Cannot merge ipset %s to %s because one of them has an invalid internal entry count\n", PROG, add->filename, to->filename); + return -1; + } + + if(unlikely(ipset6_size_add_overflows(to->entries, add->entries, &total_entries) || ipset6_entries_allocation_overflows(total_entries))) { + fprintf(stderr, "%s: Cannot merge ipset %s to %s safely: too many entries\n", PROG, add->filename, to->filename); + return -1; + } + + if(unlikely(ipset6_size_add_overflows(to->lines, add->lines, &total_lines))) { + fprintf(stderr, "%s: Cannot merge ipset %s to %s safely: too many input lines\n", PROG, add->filename, to->filename); + return -1; + } + + ipset6_grow(to, add->entries); + + memcpy(&to->netaddrs[to->entries], &add->netaddrs[0], add->entries * sizeof(network_addr6_t)); + + to->entries = total_entries; + to->lines = total_lines; + to->flags &= ~IPSET_FLAG_OPTIMIZED; + return 0; +} diff --git a/src/ipset6_optimize.c b/src/ipset6_optimize.c new file mode 100644 index 0000000..f11a7d4 --- /dev/null +++ b/src/ipset6_optimize.c @@ -0,0 +1,74 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +static int compar_netaddr6(const void *p1, const void *p2) { + const network_addr6_t *na1 = (const network_addr6_t *)p1; + const network_addr6_t *na2 = (const network_addr6_t *)p2; + + if(na1->addr < na2->addr) return -1; + if(na1->addr > na2->addr) return 1; + if(na1->broadcast > na2->broadcast) return -1; + if(na1->broadcast < na2->broadcast) return 1; + return 0; +} + +inline void ipset6_optimize(ipset6 *ips) { + network_addr6_t *naddrs; + size_t i, n = ips->entries, lines = ips->lines; + network_addr6_t *oaddrs = ips->netaddrs; + ipv6_addr_t lo, hi; + + if(unlikely(ips->flags & IPSET_FLAG_OPTIMIZED)) return; + + if(unlikely(debug)) fprintf(stderr, "%s: Optimizing %s (IPv6)\n", PROG, ips->filename); + + if(unlikely(n == 0)) { + ips->flags |= IPSET_FLAG_OPTIMIZED; + ips->unique_ips = 0; + return; + } + + qsort((void *)ips->netaddrs, ips->entries, sizeof(network_addr6_t), compar_netaddr6); + + naddrs = malloc(ips->entries * sizeof(network_addr6_t)); + if(unlikely(!naddrs)) { + fprintf(stderr, "%s: Cannot allocate memory (%zu bytes)\n", PROG, n * sizeof(network_addr6_t)); + exit(1); + } + + ips->netaddrs = naddrs; + ips->entries = 0; + ips->unique_ips = 0; + ips->lines = 0; + + lo = oaddrs[0].addr; + hi = oaddrs[0].broadcast; + for(i = 1; i < n; i++) { + if(oaddrs[i].broadcast <= hi) + continue; + + /* overflow-safe adjacency check: hi + 1 would overflow if hi == max */ + if(oaddrs[i].addr <= hi || (hi != IPV6_ADDR_MAX && oaddrs[i].addr == hi + 1)) { + hi = oaddrs[i].broadcast; + continue; + } + + ipset6_add_ip_range(ips, lo, hi); + + lo = oaddrs[i].addr; + hi = oaddrs[i].broadcast; + } + ipset6_add_ip_range(ips, lo, hi); + ips->lines = lines; + + ips->flags |= IPSET_FLAG_OPTIMIZED; + + free(oaddrs); +} + +inline void ipset6_optimize_all(ipset6 *root) { + ipset6 *ips; + for(ips = root; ips; ips = ips->next) + ipset6_optimize(ips); +} diff --git a/src/ipset6_print.c b/src/ipset6_print.c new file mode 100644 index 0000000..32794b0 --- /dev/null +++ b/src/ipset6_print.c @@ -0,0 +1,206 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" +#include "ipset6_binary.h" +#include "ipset6_print.h" + +uint8_t prefix6_enabled[129] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0-15 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 16-31 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 32-47 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 48-63 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 64-79 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 80-95 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 96-111 */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 112-127 */ + 1 /* 128 */ +}; + +size_t prefix6_counters[129]; + +/* hard cap on -1 output for IPv6 (same concept as IPv4's 256*256*256 cap) */ +#define IPV6_SINGLE_IP_CAP (256ULL * 256 * 256) + +inline void prefix6_update_counters(ipv6_addr_t addr, int prefix) { + (void)addr; + if(likely(prefix >= 0 && prefix <= 128)) + prefix6_counters[prefix]++; +} + +inline void print_addr6(ipv6_addr_t addr, int prefix) { + prefix6_update_counters(addr, prefix); + + char buf[IP6STR_MAX_LEN + 1]; + + if(prefix < 128) + printf("%s%s/%d%s\n", print_prefix_nets, ip6str_r(buf, addr), prefix, print_suffix_nets); + else + printf("%s%s%s\n", print_prefix_ips, ip6str_r(buf, addr), print_suffix_ips); +} + +inline void print_addr6_range(ipv6_addr_t lo, ipv6_addr_t hi) { + char buf[IP6STR_MAX_LEN + 1]; + + if(unlikely(lo > hi)) { + ipv6_addr_t t = hi; + fprintf(stderr, "%s: WARNING: invalid range reversed start=%s", PROG, ip6str_r(buf, lo)); + fprintf(stderr, " end=%s\n", ip6str_r(buf, hi)); + hi = lo; + lo = t; + } + + if(lo == hi) { + printf("%s%s-", print_prefix_ips, ip6str_r(buf, lo)); + printf("%s%s\n", ip6str_r(buf, hi), print_suffix_ips); + } + else { + printf("%s%s-", print_prefix_nets, ip6str_r(buf, lo)); + printf("%s%s\n", ip6str_r(buf, hi), print_suffix_nets); + } +} + +inline void print_addr6_single(ipv6_addr_t x) { + char buf[IP6STR_MAX_LEN + 1]; + printf("%s%s%s\n", print_prefix_ips, ip6str_r(buf, x), print_suffix_ips); +} + +/*------------------------------------------------------------*/ +/* Recursively compute network addresses to cover range lo-hi */ +/* for IPv6 (0..128 prefix space) */ +/* Maximum recursion depth is 128. */ +/*------------------------------------------------------------*/ +inline int split_range6(ipv6_addr_t addr, int prefix, ipv6_addr_t lo, ipv6_addr_t hi, void (*print)(ipv6_addr_t, int)) { + ipv6_addr_t bc, lower_half, upper_half; + + if(unlikely(lo > hi)) { + ipv6_addr_t t = hi; + char buf[IP6STR_MAX_LEN + 1]; + fprintf(stderr, "%s: WARNING: invalid range reversed start=%s", PROG, ip6str_r(buf, lo)); + fprintf(stderr, " end=%s\n", ip6str_r(buf, hi)); + hi = lo; + lo = t; + } + + if(unlikely(prefix < 0 || prefix > 128)) { + fprintf(stderr, "%s: Invalid IPv6 prefix %d!\n", PROG, prefix); + return 0; + } + + bc = broadcast6(addr, prefix); + + if(unlikely(lo < addr || hi > bc)) { + char buf[IP6STR_MAX_LEN + 1]; + fprintf(stderr, "%s: Out of range limits for IPv6 network %s/%d\n", PROG, ip6str_r(buf, addr), prefix); + return 0; + } + + if(lo == addr && hi == bc && prefix6_enabled[prefix]) { + print(addr, prefix); + return 1; + } + + prefix++; + lower_half = addr; + upper_half = set_bit6(addr, prefix, 1); + + if(hi < upper_half) + return split_range6(lower_half, prefix, lo, hi, print); + else if(lo >= upper_half) + return split_range6(upper_half, prefix, lo, hi, print); + else + return ( + split_range6(lower_half, prefix, lo, broadcast6(lower_half, prefix), print) + + split_range6(upper_half, prefix, upper_half, hi, print) + ); +} + +void ipset6_print(ipset6 *ips, IPSET_PRINT_CMD print) { + size_t i, n, total = 0; + char u128buf[40]; + + if(unlikely(!(ips->flags & IPSET_FLAG_OPTIMIZED))) + ipset6_optimize(ips); + + if(print == PRINT_BINARY) { + ipset6_save_binary_v20(ips); + return; + } + + if(unlikely(debug)) fprintf(stderr, "%s: Printing %s (IPv6) with %zu ranges, %s unique IPs\n", + PROG, ips->filename, ips->entries, u128_to_dec(u128buf, sizeof(u128buf), ips->unique_ips)); + + switch(print) { + case PRINT_CIDR: + for(i = 0; i <= 128; i++) + prefix6_counters[i] = 0; + + n = ips->entries; + for(i = 0; i < n; i++) + total += split_range6((__uint128_t)0, 0, ips->netaddrs[i].addr, ips->netaddrs[i].broadcast, print_addr6); + break; + + case PRINT_SINGLE_IPS: + n = ips->entries; + for(i = 0; i < n; i++) { + ipv6_addr_t start = ips->netaddrs[i].addr; + ipv6_addr_t end = ips->netaddrs[i].broadcast; + ipv6_addr_t x; + + if(unlikely(start > end)) { + char buf[IP6STR_MAX_LEN + 1]; + fprintf(stderr, "%s: WARNING: invalid range reversed start=%s", PROG, ip6str_r(buf, start)); + fprintf(stderr, " end=%s\n", ip6str_r(buf, end)); + x = end; + end = start; + start = x; + } + if(unlikely(end - start > IPV6_SINGLE_IP_CAP)) { + char buf[IP6STR_MAX_LEN + 1]; + fprintf(stderr, "%s: too big range eliminated start=%s", PROG, ip6str_r(buf, start)); + fprintf(stderr, " end=%s\n", ip6str_r(buf, end)); + continue; + } + for(x = start; x >= start && x <= end; x++) { + print_addr6_single(x); + total++; + } + } + break; + + default: + n = ips->entries; + for(i = 0; i < n; i++) { + print_addr6_range(ips->netaddrs[i].addr, ips->netaddrs[i].broadcast); + total++; + } + break; + } + + if(unlikely(debug)) { + int prefixes = 0; + + if(print == PRINT_CIDR) { + fprintf(stderr, "\n%zu printed CIDRs, break down by prefix:\n", total); + total = 0; + for(i = 0; i <= 128; i++) { + if(prefix6_counters[i]) { + fprintf(stderr, " - prefix /%zu counts %zu entries\n", i, prefix6_counters[i]); + total += prefix6_counters[i]; + prefixes++; + } + } + } + else if(print == PRINT_SINGLE_IPS) prefixes = 1; + + { + char *units; + if(print == PRINT_CIDR) units = "CIDRs"; + else if(print == PRINT_SINGLE_IPS) units = "IPs"; + else units = "ranges"; + + fprintf(stderr, "\ntotals: %zu lines read, %zu distinct IP ranges found, %d CIDR prefixes, %zu %s printed, %s unique IPs\n", + ips->lines, ips->entries, prefixes, total, units, + u128_to_dec(u128buf, sizeof(u128buf), ips->unique_ips)); + } + } +} diff --git a/src/ipset6_print.h b/src/ipset6_print.h new file mode 100644 index 0000000..0e5b419 --- /dev/null +++ b/src/ipset6_print.h @@ -0,0 +1,17 @@ +#ifndef IPRANGE_IPSET6_PRINT_H +#define IPRANGE_IPSET6_PRINT_H + +#include "ipset6.h" + +extern uint8_t prefix6_enabled[]; + +extern void ipset6_print(ipset6 *ips, IPSET_PRINT_CMD print); + +extern void prefix6_update_counters(ipv6_addr_t addr, int prefix); +extern void print_addr6(ipv6_addr_t addr, int prefix); +extern void print_addr6_range(ipv6_addr_t lo, ipv6_addr_t hi); +extern void print_addr6_single(ipv6_addr_t x); + +extern int split_range6(ipv6_addr_t addr, int prefix, ipv6_addr_t lo, ipv6_addr_t hi, void (*print)(ipv6_addr_t, int)); + +#endif /* IPRANGE_IPSET6_PRINT_H */ diff --git a/tests.d/78-ipv6-basic-merge/cmd.sh b/tests.d/78-ipv6-basic-merge/cmd.sh new file mode 100755 index 0000000..677a8f4 --- /dev/null +++ b/tests.d/78-ipv6-basic-merge/cmd.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Test basic IPv6 merge (optimize/dedup) + +printf "2001:db8::3\n2001:db8::1\n2001:db8::2\nfe80::1\n2001:db8::1\n" | ../../iprange -6 diff --git a/tests.d/78-ipv6-basic-merge/output b/tests.d/78-ipv6-basic-merge/output new file mode 100644 index 0000000..ccde5c6 --- /dev/null +++ b/tests.d/78-ipv6-basic-merge/output @@ -0,0 +1,3 @@ +2001:db8::1 +2001:db8::2/127 +fe80::1 diff --git a/tests.d/79-ipv6-cidr-decomposition/cmd.sh b/tests.d/79-ipv6-cidr-decomposition/cmd.sh new file mode 100755 index 0000000..e30b454 --- /dev/null +++ b/tests.d/79-ipv6-cidr-decomposition/cmd.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Test IPv6 CIDR decomposition + +echo "# Full /128:" +echo "2001:db8::1" | ../../iprange -6 + +echo "# /126 block:" +echo "2001:db8::/126" | ../../iprange -6 + +echo "# /64 block:" +echo "2001:db8:1::/64" | ../../iprange -6 + +echo "# Range to CIDRs:" +echo "2001:db8::1 - 2001:db8::6" | ../../iprange -6 + +echo "# Compressed notation:" +echo "::1" | ../../iprange -6 + +echo "# Full notation:" +echo "2001:0db8:0000:0000:0000:0000:0000:0001" | ../../iprange -6 diff --git a/tests.d/79-ipv6-cidr-decomposition/output b/tests.d/79-ipv6-cidr-decomposition/output new file mode 100644 index 0000000..ea3fd43 --- /dev/null +++ b/tests.d/79-ipv6-cidr-decomposition/output @@ -0,0 +1,15 @@ +# Full /128: +2001:db8::1 +# /126 block: +2001:db8::/126 +# /64 block: +2001:db8:1::/64 +# Range to CIDRs: +2001:db8::1 +2001:db8::2/127 +2001:db8::4/127 +2001:db8::6 +# Compressed notation: +::1 +# Full notation: +2001:db8::1 diff --git a/tests.d/80-ipv6-set-operations/cmd.sh b/tests.d/80-ipv6-set-operations/cmd.sh new file mode 100755 index 0000000..6ef7149 --- /dev/null +++ b/tests.d/80-ipv6-set-operations/cmd.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Test IPv6 set operations: common, exclude, diff + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +printf "2001:db8::/32\n" > "$tmpdir/a" +printf "2001:db8:1::/48\n" > "$tmpdir/b" +printf "2001:db8::1\n" > "$tmpdir/c" +printf "2001:db8::2\n" > "$tmpdir/d" + +echo "# Common:" +../../iprange -6 "$tmpdir/a" --common "$tmpdir/b" + +echo "# Exclude (first 3 lines):" +../../iprange -6 "$tmpdir/a" --except "$tmpdir/b" | head -3 + +echo "# Diff (symmetric difference):" +../../iprange -6 "$tmpdir/c" --diff "$tmpdir/d" + +echo "# Exclude empty result:" +../../iprange -6 "$tmpdir/b" --except "$tmpdir/a" diff --git a/tests.d/80-ipv6-set-operations/output b/tests.d/80-ipv6-set-operations/output new file mode 100644 index 0000000..5bf8173 --- /dev/null +++ b/tests.d/80-ipv6-set-operations/output @@ -0,0 +1,10 @@ +# Common: +2001:db8:1::/48 +# Exclude (first 3 lines): +2001:db8::/48 +2001:db8:2::/47 +2001:db8:4::/46 +# Diff (symmetric difference): +2001:db8::1 +2001:db8::2 +# Exclude empty result: diff --git a/tests.d/81-ipv6-ipv4-mapped-normalization/cmd.sh b/tests.d/81-ipv6-ipv4-mapped-normalization/cmd.sh new file mode 100755 index 0000000..0504e01 --- /dev/null +++ b/tests.d/81-ipv6-ipv4-mapped-normalization/cmd.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Test IPv4-to-IPv6 mapped normalization in -6 mode + +echo "# IPv4 address becomes mapped IPv6:" +echo "10.0.0.1" | ../../iprange -6 + +echo "# Explicit mapped IPv6 preserved:" +echo "::ffff:10.0.0.1" | ../../iprange -6 + +echo "# IPv4 and explicit mapped merge:" +printf "10.0.0.1\n::ffff:10.0.0.1\n" | ../../iprange -6 + +echo "# IPv4 CIDR becomes mapped range:" +echo "10.0.0.0/30" | ../../iprange -6 + +echo "# Mixed IPv4 and IPv6 merge:" +printf "2001:db8::1\n10.0.0.1\n" | ../../iprange -6 diff --git a/tests.d/81-ipv6-ipv4-mapped-normalization/output b/tests.d/81-ipv6-ipv4-mapped-normalization/output new file mode 100644 index 0000000..67cb532 --- /dev/null +++ b/tests.d/81-ipv6-ipv4-mapped-normalization/output @@ -0,0 +1,11 @@ +# IPv4 address becomes mapped IPv6: +::ffff:10.0.0.1 +# Explicit mapped IPv6 preserved: +::ffff:10.0.0.1 +# IPv4 and explicit mapped merge: +::ffff:10.0.0.1 +# IPv4 CIDR becomes mapped range: +::ffff:10.0.0.0/126 +# Mixed IPv4 and IPv6 merge: +::ffff:10.0.0.1 +2001:db8::1 diff --git a/tests.d/82-ipv6-binary-roundtrip/cmd.sh b/tests.d/82-ipv6-binary-roundtrip/cmd.sh new file mode 100755 index 0000000..d68a153 --- /dev/null +++ b/tests.d/82-ipv6-binary-roundtrip/cmd.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Test IPv6 binary save/load roundtrip + +echo "# Single IPv6 roundtrip:" +echo "2001:db8::1" | ../../iprange -6 --print-binary | ../../iprange -6 + +echo "# Multiple IPv6 roundtrip:" +printf "2001:db8::1\n2001:db8::2\nfe80::1\n" | ../../iprange -6 --print-binary | ../../iprange -6 + +echo "# IPv6 binary count roundtrip:" +echo "2001:db8::/32" | ../../iprange -6 --print-binary | ../../iprange -6 -C + +echo "# Mapped IPv4 binary roundtrip:" +echo "10.0.0.1" | ../../iprange -6 --print-binary | ../../iprange -6 diff --git a/tests.d/82-ipv6-binary-roundtrip/output b/tests.d/82-ipv6-binary-roundtrip/output new file mode 100644 index 0000000..1be74e2 --- /dev/null +++ b/tests.d/82-ipv6-binary-roundtrip/output @@ -0,0 +1,10 @@ +# Single IPv6 roundtrip: +2001:db8::1 +# Multiple IPv6 roundtrip: +2001:db8::1 +2001:db8::2 +fe80::1 +# IPv6 binary count roundtrip: +1,79228162514264337593543950336 +# Mapped IPv4 binary roundtrip: +::ffff:10.0.0.1 diff --git a/tests.d/83-ipv6-count-compare/cmd.sh b/tests.d/83-ipv6-count-compare/cmd.sh new file mode 100755 index 0000000..7204e24 --- /dev/null +++ b/tests.d/83-ipv6-count-compare/cmd.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Test IPv6 count and compare modes + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +printf "2001:db8::/48\n" > "$tmpdir/a" +printf "2001:db8:1::/48\n" > "$tmpdir/b" + +echo "# Count unique merged:" +printf "2001:db8::1\n2001:db8::2\n2001:db8::1\n" | ../../iprange -6 -C + +echo "# Count unique all:" +../../iprange -6 --header --count-unique-all "$tmpdir/a" as netA "$tmpdir/b" as netB + +echo "# Compare-next:" +../../iprange -6 --header "$tmpdir/a" as netA --compare-next "$tmpdir/b" as netB + +echo "# /128 count:" +echo "2001:db8::1" | ../../iprange -6 -C + +echo "# /64 count:" +echo "2001:db8:1::/64" | ../../iprange -6 -C diff --git a/tests.d/83-ipv6-count-compare/output b/tests.d/83-ipv6-count-compare/output new file mode 100644 index 0000000..ac3fcdf --- /dev/null +++ b/tests.d/83-ipv6-count-compare/output @@ -0,0 +1,13 @@ +# Count unique merged: +1,2 +# Count unique all: +name,entries,unique_ips +netA,1,1208925819614629174706176 +netB,1,1208925819614629174706176 +# Compare-next: +name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips +netA,netB,1,1,1208925819614629174706176,1208925819614629174706176,2417851639229258349412352,0 +# /128 count: +1,1 +# /64 count: +1,18446744073709551616 diff --git a/tests.d/84-ipv6-single-ips-cap/cmd.sh b/tests.d/84-ipv6-single-ips-cap/cmd.sh new file mode 100755 index 0000000..5698f3f --- /dev/null +++ b/tests.d/84-ipv6-single-ips-cap/cmd.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Test IPv6 -1 (single IPs) output and cap behavior + +echo "# Small range single IPs:" +echo "2001:db8::/126" | ../../iprange -6 -1 + +echo "# Single IP output:" +echo "2001:db8::1" | ../../iprange -6 -1 + +echo "# Range output:" +printf "2001:db8::1\n2001:db8::2\n2001:db8::3\n" | ../../iprange -6 -j diff --git a/tests.d/84-ipv6-single-ips-cap/output b/tests.d/84-ipv6-single-ips-cap/output new file mode 100644 index 0000000..39302b9 --- /dev/null +++ b/tests.d/84-ipv6-single-ips-cap/output @@ -0,0 +1,9 @@ +# Small range single IPs: +2001:db8:: +2001:db8::1 +2001:db8::2 +2001:db8::3 +# Single IP output: +2001:db8::1 +# Range output: +2001:db8::1-2001:db8::3 diff --git a/tests.d/85-ipv6-default-mode-is-ipv4/cmd.sh b/tests.d/85-ipv6-default-mode-is-ipv4/cmd.sh new file mode 100755 index 0000000..b8834f5 --- /dev/null +++ b/tests.d/85-ipv6-default-mode-is-ipv4/cmd.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Test that default mode (no -4 or -6) still behaves as IPv4 + +echo "# Default mode still parses IPv4:" +echo "10.0.0.1" | ../../iprange + +echo "# Explicit -4 works same as default:" +echo "10.0.0.1" | ../../iprange -4 + +echo "# Default mode count:" +echo "10.0.0.0/24" | ../../iprange -C + +echo "# Explicit -4 count:" +echo "10.0.0.0/24" | ../../iprange -4 -C diff --git a/tests.d/85-ipv6-default-mode-is-ipv4/output b/tests.d/85-ipv6-default-mode-is-ipv4/output new file mode 100644 index 0000000..208aa4d --- /dev/null +++ b/tests.d/85-ipv6-default-mode-is-ipv4/output @@ -0,0 +1,8 @@ +# Default mode still parses IPv4: +10.0.0.1 +# Explicit -4 works same as default: +10.0.0.1 +# Default mode count: +1,256 +# Explicit -4 count: +1,256 diff --git a/tests.d/86-ipv6-mixed-family-rejection/cmd.sh b/tests.d/86-ipv6-mixed-family-rejection/cmd.sh new file mode 100755 index 0000000..3a15bc5 --- /dev/null +++ b/tests.d/86-ipv6-mixed-family-rejection/cmd.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Test mixed-family range endpoint rejection + +tmpdir=$(mktemp -d) +stderr="$tmpdir/stderr" +trap 'rm -rf "$tmpdir"' EXIT + +echo "# Mixed-family range should be rejected:" +echo "10.0.0.1 - 2001:db8::1" | ../../iprange -6 2>"$stderr" +rc=$? + +if [ $rc -ne 0 ] && grep -q "Mixed-family range" "$stderr"; then + echo "PASS: mixed-family range rejected" +else + echo "FAIL: expected mixed-family rejection" + cat "$stderr" + exit 1 +fi diff --git a/tests.d/86-ipv6-mixed-family-rejection/output b/tests.d/86-ipv6-mixed-family-rejection/output new file mode 100644 index 0000000..a5e202d --- /dev/null +++ b/tests.d/86-ipv6-mixed-family-rejection/output @@ -0,0 +1,2 @@ +# Mixed-family range should be rejected: +PASS: mixed-family range rejected diff --git a/tests.d/87-ipv6-boundary-addresses/cmd.sh b/tests.d/87-ipv6-boundary-addresses/cmd.sh new file mode 100755 index 0000000..96af0a7 --- /dev/null +++ b/tests.d/87-ipv6-boundary-addresses/cmd.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Test IPv6 boundary addresses + +echo "# All zeros:" +echo "::" | ../../iprange -6 + +echo "# Loopback:" +echo "::1" | ../../iprange -6 + +echo "# All ones (ffff:...:ffff):" +echo "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" | ../../iprange -6 + +echo "# Full range:" +echo "::/0" | ../../iprange -6 + +echo "# Link-local:" +echo "fe80::1" | ../../iprange -6 + +echo "# Adjacent at bottom:" +printf "::\n::1\n" | ../../iprange -6 + +echo "# Count /1 (half the IPv6 space):" +echo "::/1" | ../../iprange -6 -C diff --git a/tests.d/87-ipv6-boundary-addresses/output b/tests.d/87-ipv6-boundary-addresses/output new file mode 100644 index 0000000..5a79620 --- /dev/null +++ b/tests.d/87-ipv6-boundary-addresses/output @@ -0,0 +1,14 @@ +# All zeros: +:: +# Loopback: +::1 +# All ones (ffff:...:ffff): +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +# Full range: +::/0 +# Link-local: +fe80::1 +# Adjacent at bottom: +::/127 +# Count /1 (half the IPv6 space): +1,170141183460469231731687303715884105728 diff --git a/tests.d/88-ipv6-has-ipv6-flag/cmd.sh b/tests.d/88-ipv6-has-ipv6-flag/cmd.sh new file mode 100755 index 0000000..5a75b8f --- /dev/null +++ b/tests.d/88-ipv6-has-ipv6-flag/cmd.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Test --has-ipv6 feature detection flag + +../../iprange --has-ipv6 2>&1 +rc=$? + +if [ $rc -eq 0 ]; then + echo "PASS: --has-ipv6 exits with 0" +else + echo "FAIL: --has-ipv6 exited with $rc" + exit 1 +fi diff --git a/tests.d/88-ipv6-has-ipv6-flag/output b/tests.d/88-ipv6-has-ipv6-flag/output new file mode 100644 index 0000000..6f0f60c --- /dev/null +++ b/tests.d/88-ipv6-has-ipv6-flag/output @@ -0,0 +1,2 @@ +yes, IPv6 support is present. +PASS: --has-ipv6 exits with 0 From 543993ccaf0bd12d15d0ef11d28a887e21f658a2 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Fri, 3 Apr 2026 19:03:02 +0300 Subject: [PATCH 3/9] Fix all review findings from 6-agent parallel review Critical fixes: - ipset6_added_entry adjacency check now guards against IPV6_ADDR_MAX overflow (previously "ffff:...:ffff" + "::" merged to "::/0") - str2netaddr6 prefix parsing uses strtol instead of atoi (previously "/abc" silently became /0, expanding input to entire IPv6 space) - IPv4 mode now converts ::ffff:x.x.x.x mapped addresses back to IPv4 - IPv6 lines in IPv4 mode are dropped gracefully with one-warning summary instead of hard failure (ipv6_dropped_in_ipv4_mode) - IPv4 loader detects v2 binary header and produces clear error - CMakeLists.txt updated with all 16 new IPv6 source files High fixes: - configure.ac checks for __uint128_t availability (fails with clear message on platforms that lack it) Medium fixes: - unique_ips saturates at IPV6_ADDR_MAX instead of wrapping to 0 for full address space ranges - --min-prefix and --prefixes now work in IPv6 mode (1..128 range) - --default-prefix skipped in IPv6 mode (always uses /128) - --has-ipv6 added to --help output - MODE_ defines moved to iprange.h (shared, no duplication) - Overflow guards added to ipset6_exclude and ipset6_diff for hi+1 at IPV6_ADDR_MAX boundary - unsigned long -> size_t in set operation index variables - UTF-8 BOM stripped from first line in IPv6 loader - unique_ips < entries check added to binary v2 loader Low fixes: - Dead ipset6_chain_append removed from iprange.c - NULL check added to ipset6_free - Debug logging added to ipset6_added_entry Tests: 93 total (77 IPv4 + 16 IPv6), all passing. 5 new regression tests for the critical bugs found. --- CMakeLists.txt | 21 ++++++- configure.ac | 4 ++ src/iprange.c | 25 ++------ src/iprange.h | 12 ++++ src/iprange6.h | 8 ++- src/iprange6_main.c | 63 ++++++++++++++----- src/ipset6.c | 1 + src/ipset6.h | 30 +++++++-- src/ipset6_binary.c | 5 ++ src/ipset6_common.c | 2 +- src/ipset6_diff.c | 18 ++++-- src/ipset6_exclude.c | 15 ++++- src/ipset6_load.c | 4 ++ src/ipset_load.c | 51 +++++++++++++++ tests.d/89-ipv6-adjacency-max-no-wrap/cmd.sh | 9 +++ tests.d/89-ipv6-adjacency-max-no-wrap/output | 5 ++ .../90-ipv6-invalid-prefix-rejected/cmd.sh | 16 +++++ .../90-ipv6-invalid-prefix-rejected/output | 1 + .../91-ipv4-mapped-ipv6-in-ipv4-mode/cmd.sh | 11 ++++ .../91-ipv4-mapped-ipv6-in-ipv4-mode/output | 7 +++ tests.d/92-ipv4-graceful-ipv6-drop/cmd.sh | 25 ++++++++ tests.d/92-ipv4-graceful-ipv6-drop/output | 7 +++ tests.d/93-ipv6-min-prefix-prefixes/cmd.sh | 8 +++ tests.d/93-ipv6-min-prefix-prefixes/output | 11 ++++ 24 files changed, 308 insertions(+), 51 deletions(-) create mode 100755 tests.d/89-ipv6-adjacency-max-no-wrap/cmd.sh create mode 100644 tests.d/89-ipv6-adjacency-max-no-wrap/output create mode 100755 tests.d/90-ipv6-invalid-prefix-rejected/cmd.sh create mode 100644 tests.d/90-ipv6-invalid-prefix-rejected/output create mode 100755 tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/cmd.sh create mode 100644 tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/output create mode 100755 tests.d/92-ipv4-graceful-ipv6-drop/cmd.sh create mode 100644 tests.d/92-ipv4-graceful-ipv6-drop/output create mode 100755 tests.d/93-ipv6-min-prefix-prefixes/cmd.sh create mode 100644 tests.d/93-ipv6-min-prefix-prefixes/output diff --git a/CMakeLists.txt b/CMakeLists.txt index a3256a6..2caed4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,26 @@ find_package (Threads) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat-signedness -Werror=format-security") set(SOURCE_FILES - src/iprange.c src/ipset.c src/ipset.h src/iprange.h src/ipset_binary.c src/ipset_binary.h src/ipset_load.c src/ipset_load.h src/ipset_reduce.c src/ipset_print.c src/ipset_print.h src/ipset_optimize.c src/ipset_optimize.h src/ipset_reduce.h src/ipset_diff.c src/ipset_diff.h src/ipset_common.c src/ipset_common.h src/ipset_exclude.c src/ipset_exclude.h src/ipset_merge.c src/ipset_merge.h src/ipset_copy.c src/ipset_copy.h src/ipset_combine.c src/ipset_combine.h) + src/iprange.c src/iprange.h src/iprange6.h src/iprange6_main.c + src/ipset.c src/ipset.h + src/ipset6.c src/ipset6.h + src/ipset6_binary.c src/ipset6_binary.h + src/ipset6_combine.c src/ipset6_common.c src/ipset6_copy.c + src/ipset6_diff.c src/ipset6_exclude.c + src/ipset6_load.c src/ipset6_load.h + src/ipset6_merge.c src/ipset6_optimize.c + src/ipset6_print.c src/ipset6_print.h + src/ipset_binary.c src/ipset_binary.h + src/ipset_combine.c src/ipset_combine.h + src/ipset_common.c src/ipset_common.h + src/ipset_copy.c src/ipset_copy.h + src/ipset_diff.c src/ipset_diff.h + src/ipset_exclude.c src/ipset_exclude.h + src/ipset_load.c src/ipset_load.h + src/ipset_merge.c src/ipset_merge.h + src/ipset_optimize.c src/ipset_optimize.h + src/ipset_print.c src/ipset_print.h + src/ipset_reduce.c src/ipset_reduce.h) include_directories(AFTER ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src) add_definitions("-DHAVE_CONFIG_H") diff --git a/configure.ac b/configure.ac index 8660468..cd7a7d6 100644 --- a/configure.ac +++ b/configure.ac @@ -73,6 +73,10 @@ CC="${PTHREAD_CC}" AC_TYPE_UINT32_T AC_C_INLINE +AC_CHECK_TYPE([__uint128_t], [], + [AC_MSG_ERROR([Your compiler does not support __uint128_t, required for IPv6 support.])], + [/* no includes needed */]) + test "${with_compare_with_common}" = "yes" && AC_DEFINE([COMPARE_WITH_COMMON], [1], [compare settings]) test "${with_system_ip2str}" = "yes" && AC_DEFINE([SYSTEM_IP2STR], [1], [ip2str settings]) diff --git a/src/iprange.c b/src/iprange.c index c913d2a..35d78ff 100644 --- a/src/iprange.c +++ b/src/iprange.c @@ -284,6 +284,7 @@ static void usage(const char *me) { " --has-reduce\n" " --has-filelist-loading\n" " --has-directory-loading\n" + " --has-ipv6\n" " Exits with 0,\n" " other versions of iprange will exit with 1.\n" " Use this option in scripts to find if this\n" @@ -384,17 +385,6 @@ static void ipset_chain_append(ipset **head, ipset **tail, ipset *ips) *tail = ips; } -static void ipset6_chain_append(ipset6 **head, ipset6 **tail, ipset6 *ips) -{ - ips->next = NULL; - ips->prev = *tail; - - if(*tail) (*tail)->next = ips; - else *head = ips; - - *tail = ips; -} - static int compare_pathnames(const void *left, const void *right) { const char * const *a = left; @@ -448,16 +438,6 @@ static void free_pathnames(char **files, size_t entries) free(files); } -#define MODE_COMBINE 1 -#define MODE_COMPARE 2 -#define MODE_COMPARE_FIRST 3 -#define MODE_COMPARE_NEXT 4 -#define MODE_COUNT_UNIQUE_MERGED 5 -#define MODE_COUNT_UNIQUE_ALL 6 -#define MODE_REDUCE 7 -#define MODE_COMMON 8 -#define MODE_EXCLUDE_NEXT 9 -#define MODE_DIFF 10 /*#define MODE_HISTOGRAM 11 */ int main(int argc, char **argv) { @@ -497,12 +477,14 @@ int main(int argc, char **argv) { } } else if(i+1 < argc && !strcmp(argv[i], "--min-prefix")) { + if(active_family == 6) { i++; continue; } /* handled by iprange6_run */ int j; int min_prefix = (int)parse_long_option_or_die("--min-prefix", argv[++i], 1, 32, "It must be between 1 and 32."); for(j = 0; j < min_prefix; j++) prefix_enabled[j] = 0; } else if(i+1 < argc && !strcmp(argv[i], "--prefixes")) { + if(active_family == 6) { i++; continue; } /* handled by iprange6_run */ char *s = NULL, *e = argv[++i]; int j; @@ -530,6 +512,7 @@ int main(int argc, char **argv) { !strcmp(argv[i], "--default-prefix") || !strcmp(argv[i], "-p") )) { + if(active_family == 6) { i++; continue; } /* IPv6 always uses 128 as default */ const char *option = argv[i]; const char *value = argv[++i]; default_prefix = (int)parse_long_option_or_die(option, value, 0, 32, "It must be between 0 and 32."); diff --git a/src/iprange.h b/src/iprange.h index 7241b46..d22399c 100644 --- a/src/iprange.h +++ b/src/iprange.h @@ -181,4 +181,16 @@ static inline char *ip2str_r(char *buf, in_addr_t IP) { #include "ipset_print.h" #include "ipset_reduce.h" +/* operation modes — shared between iprange.c and iprange6_main.c */ +#define MODE_COMBINE 1 +#define MODE_COMPARE 2 +#define MODE_COMPARE_FIRST 3 +#define MODE_COMPARE_NEXT 4 +#define MODE_COUNT_UNIQUE_MERGED 5 +#define MODE_COUNT_UNIQUE_ALL 6 +#define MODE_REDUCE 7 +#define MODE_COMMON 8 +#define MODE_EXCLUDE_NEXT 9 +#define MODE_DIFF 10 + #endif //IPRANGE_IPRANGE_H diff --git a/src/iprange6.h b/src/iprange6.h index 6ed1722..2bf0871 100644 --- a/src/iprange6.h +++ b/src/iprange6.h @@ -111,17 +111,21 @@ static inline network_addr6_t str2netaddr6(char *ipstr, int *err) { ipv6_addr_t addr; if((prefixstr = strchr(ipstr, '/'))) { + char *endptr = NULL; + long parsed_prefix; *prefixstr = '\0'; prefixstr++; errno = 0; - prefix = atoi(prefixstr); - if(unlikely(errno || (*prefixstr == '\0') || prefix < 0 || prefix > 128)) { + parsed_prefix = strtol(prefixstr, &endptr, 10); + if(unlikely(errno || !endptr || endptr == prefixstr || *endptr != '\0' + || parsed_prefix < 0 || parsed_prefix > 128)) { if(err) (*err)++; fprintf(stderr, "%s: Invalid IPv6 prefix /%s\n", PROG, prefixstr); netaddr.addr = 0; netaddr.broadcast = 0; return netaddr; } + prefix = (int)parsed_prefix; } if(!str_to_ipv6(ipstr, &addr)) { diff --git a/src/iprange6_main.c b/src/iprange6_main.c index 2776dcf..0484406 100644 --- a/src/iprange6_main.c +++ b/src/iprange6_main.c @@ -64,13 +64,57 @@ int iprange6_run(int argc, char **argv, int mode, IPSET_PRINT_CMD print, int i, read_second = 0, inputs = 0, ret = 0; char u128buf[40]; - /* re-scan argv for file arguments and positional operators */ + /* re-scan argv for file arguments, positional operators, and IPv6-relevant options */ for(i = 1; i < argc; i++) { + /* handle --min-prefix for IPv6 (0..128) */ + if(i+1 < argc && !strcmp(argv[i], "--min-prefix")) { + int j; + char *end = NULL; + long val; + errno = 0; + val = strtol(argv[++i], &end, 10); + if(errno || !end || end == argv[i] || *end != '\0' || val < 1 || val > 128) { + fprintf(stderr, "%s: Invalid value '%s' for --min-prefix. It must be between 1 and 128.\n", PROG, argv[i]); + exit(1); + } + for(j = 0; j < (int)val; j++) + prefix6_enabled[j] = 0; + continue; + } + + /* handle --prefixes for IPv6 (1..128) */ + if(i+1 < argc && !strcmp(argv[i], "--prefixes")) { + char *s = NULL, *e = argv[++i]; + int j; + for(j = 0; j < 128; j++) + prefix6_enabled[j] = 0; + while(e && *e && e != s) { + s = e; + j = (int)strtol(s, &e, 10); + if(j <= 0 || j > 128) { + fprintf(stderr, "%s: Only prefixes from 1 to 128 can be set. %d is invalid.\n", PROG, j); + exit(1); + } + prefix6_enabled[j] = 1; + if(*e == ',' || *e == ' ') e++; + } + if(e && *e) { + fprintf(stderr, "%s: Invalid prefix '%s'\n", PROG, e); + exit(1); + } + continue; + } + + /* handle --default-prefix for IPv6 (0..128) */ + if(i+1 < argc && (!strcmp(argv[i], "--default-prefix") || !strcmp(argv[i], "-p"))) { + /* already parsed in main() for IPv4 range (0..32); we just skip the value here + * since the IPv6 parser always uses 128 as default prefix */ + i++; + continue; + } + /* skip options that take a value */ if(i+1 < argc && (!strcmp(argv[i], "as") - || !strcmp(argv[i], "--min-prefix") - || !strcmp(argv[i], "--prefixes") - || !strcmp(argv[i], "--default-prefix") || !strcmp(argv[i], "-p") || !strcmp(argv[i], "--ipset-reduce") || !strcmp(argv[i], "--reduce-factor") || !strcmp(argv[i], "--ipset-reduce-entries") || !strcmp(argv[i], "--reduce-entries") || !strcmp(argv[i], "--print-prefix") @@ -276,17 +320,6 @@ int iprange6_run(int argc, char **argv, int mode, IPSET_PRINT_CMD print, /* --- mode execution (mirrors the IPv4 logic in main()) --- */ - #define MODE_COMBINE 1 - #define MODE_COMPARE 2 - #define MODE_COMPARE_FIRST 3 - #define MODE_COMPARE_NEXT 4 - #define MODE_COUNT_UNIQUE_MERGED 5 - #define MODE_COUNT_UNIQUE_ALL 6 - #define MODE_REDUCE 7 - #define MODE_COMMON 8 - #define MODE_EXCLUDE_NEXT 9 - #define MODE_DIFF 10 - if(mode == MODE_COMBINE || mode == MODE_REDUCE || mode == MODE_COUNT_UNIQUE_MERGED) { strcpy(root->filename, "combined ipset"); diff --git a/src/ipset6.c b/src/ipset6.c index 09157e2..a34833d 100644 --- a/src/ipset6.c +++ b/src/ipset6.c @@ -36,6 +36,7 @@ ipset6 *ipset6_create(const char *filename, size_t entries) { } void ipset6_free(ipset6 *ips) { + if(!ips) return; if(ips->next) ips->next->prev = ips->prev; if(ips->prev) ips->prev->next = ips->next; diff --git a/src/ipset6.h b/src/ipset6.h index 5a6e8cd..aa9c9c5 100644 --- a/src/ipset6.h +++ b/src/ipset6.h @@ -51,22 +51,44 @@ static inline void ipset6_grow(ipset6 *ips, size_t free_entries_needed) { static inline void ipset6_added_entry(ipset6 *ips) { size_t entries = ips->entries; + ipv6_addr_t lo = ips->netaddrs[entries].addr; + ipv6_addr_t hi = ips->netaddrs[entries].broadcast; ips->lines++; - ips->unique_ips += (__uint128_t)ips->netaddrs[entries].broadcast - (__uint128_t)ips->netaddrs[entries].addr + 1; + + /* overflow-safe unique_ips: 2^128 doesn't fit in __uint128_t, saturate at max */ + if(lo == 0 && hi == IPV6_ADDR_MAX) + ips->unique_ips = IPV6_ADDR_MAX; + else { + __uint128_t size = hi - lo + 1; + if(ips->unique_ips > IPV6_ADDR_MAX - size) + ips->unique_ips = IPV6_ADDR_MAX; + else + ips->unique_ips += size; + } if(likely(ips->flags & IPSET_FLAG_OPTIMIZED && entries > 0)) { - if(unlikely(ips->netaddrs[entries].addr == (ips->netaddrs[entries - 1].broadcast + 1))) { - ips->netaddrs[entries - 1].broadcast = ips->netaddrs[entries].broadcast; + /* overflow-safe adjacency: broadcast + 1 wraps at IPV6_ADDR_MAX */ + if(unlikely(ips->netaddrs[entries - 1].broadcast != IPV6_ADDR_MAX && + lo == (ips->netaddrs[entries - 1].broadcast + 1))) { + ips->netaddrs[entries - 1].broadcast = hi; return; } - if(likely(ips->netaddrs[entries].addr > ips->netaddrs[entries - 1].broadcast)) { + if(likely(lo > ips->netaddrs[entries - 1].broadcast)) { ips->entries++; return; } ips->flags &= ~IPSET_FLAG_OPTIMIZED; + + if(unlikely(debug)) { + char buf[IP6STR_MAX_LEN + 1]; + fprintf(stderr, "%s: NON-OPTIMIZED %s at line %zu, entry %zu, last was %s - ", PROG, ips->filename, ips->lines, ips->entries, ip6str_r(buf, ips->netaddrs[entries - 1].addr)); + fprintf(stderr, "%s, new is ", ip6str_r(buf, ips->netaddrs[entries - 1].broadcast)); + fprintf(stderr, "%s - ", ip6str_r(buf, lo)); + fprintf(stderr, "%s\n", ip6str_r(buf, hi)); + } } ips->entries++; diff --git a/src/ipset6_binary.c b/src/ipset6_binary.c index 3bb10ad..bc6caa9 100644 --- a/src/ipset6_binary.c +++ b/src/ipset6_binary.c @@ -230,6 +230,11 @@ int ipset6_load_binary_v20(FILE *fp, ipset6 *ips, int first_line_missing) { return 1; } + if(unique_ips < entries && unique_ips != 0) { + fprintf(stderr, "%s: %s: unique IPs cannot be less than entries (%zu)\n", PROG, ips->filename, entries); + return 1; + } + if(lines < entries) { fprintf(stderr, "%s: %s: lines (%zu) cannot be less than entries (%zu)\n", PROG, ips->filename, lines, entries); return 1; diff --git a/src/ipset6_common.c b/src/ipset6_common.c index d3ba773..f284144 100644 --- a/src/ipset6_common.c +++ b/src/ipset6_common.c @@ -4,7 +4,7 @@ inline ipset6 *ipset6_common(ipset6 *ips1, ipset6 *ips2) { ipset6 *ips; - unsigned long int n1, n2, i1 = 0, i2 = 0; + size_t n1, n2, i1 = 0, i2 = 0; ipv6_addr_t lo1, lo2, hi1, hi2, lo, hi; if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED))) diff --git a/src/ipset6_diff.c b/src/ipset6_diff.c index f7ba101..727e012 100644 --- a/src/ipset6_diff.c +++ b/src/ipset6_diff.c @@ -4,7 +4,7 @@ inline ipset6 *ipset6_diff(ipset6 *ips1, ipset6 *ips2) { ipset6 *ips; - unsigned long int n1, n2, i1 = 0, i2 = 0; + size_t n1, n2, i1 = 0, i2 = 0; ipv6_addr_t lo1, lo2, hi1, hi2; if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED))) @@ -78,21 +78,29 @@ inline ipset6 *ipset6_diff(ipset6 *ips1, ipset6 *ips2) { ipset6_add_ip_range(ips, lo1, lo2 - 1); if(hi1 > hi2) { - lo1 = hi2 + 1; - i2++; + if(hi2 == IPV6_ADDR_MAX) { i1++; i2++; } + else { lo1 = hi2 + 1; i2++; } if(i2 < n2) { lo2 = ips2->netaddrs[i2].addr; hi2 = ips2->netaddrs[i2].broadcast; } + if(i1 < n1 && lo1 > hi1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } continue; } else if(hi2 > hi1) { - lo2 = hi1 + 1; - i1++; + if(hi1 == IPV6_ADDR_MAX) { i1++; i2++; } + else { lo2 = hi1 + 1; i1++; } if(i1 < n1) { lo1 = ips1->netaddrs[i1].addr; hi1 = ips1->netaddrs[i1].broadcast; } + if(i2 < n2 && lo2 > hi2) { + lo2 = ips2->netaddrs[i2].addr; + hi2 = ips2->netaddrs[i2].broadcast; + } continue; } else { diff --git a/src/ipset6_exclude.c b/src/ipset6_exclude.c index 390e2fb..c7bb969 100644 --- a/src/ipset6_exclude.c +++ b/src/ipset6_exclude.c @@ -4,7 +4,7 @@ inline ipset6 *ipset6_exclude(ipset6 *ips1, ipset6 *ips2) { ipset6 *ips; - unsigned long int n1, n2, i1 = 0, i2 = 0; + size_t n1, n2, i1 = 0, i2 = 0; ipv6_addr_t lo1, lo2, hi1, hi2; if(unlikely(!(ips1->flags & IPSET_FLAG_OPTIMIZED))) @@ -87,7 +87,18 @@ inline ipset6 *ipset6_exclude(ipset6 *ips1, ipset6 *ips2) { } } else { - lo1 = hi2 + 1; + /* hi2 + 1 would overflow if hi2 == IPV6_ADDR_MAX, but that means + * ips2 covers everything from lo1..max, so nothing remains in ips1 */ + if(hi2 == IPV6_ADDR_MAX) { + i1++; + if(i1 < n1) { + lo1 = ips1->netaddrs[i1].addr; + hi1 = ips1->netaddrs[i1].broadcast; + } + } + else { + lo1 = hi2 + 1; + } i2++; if(i2 < n2) { lo2 = ips2->netaddrs[i2].addr; diff --git a/src/ipset6_load.c b/src/ipset6_load.c index 75490f6..7eb7db1 100644 --- a/src/ipset6_load.c +++ b/src/ipset6_load.c @@ -495,6 +495,10 @@ ipset6 *ipset6_load(const char *filename) { return ips; } + /* strip UTF-8 BOM if present on first line */ + if((unsigned char)line[0] == 0xEF && (unsigned char)line[1] == 0xBB && (unsigned char)line[2] == 0xBF) + memmove(line, line + 3, strlen(line + 3) + 1); + /* check for binary headers */ if(!strcmp(line, BINARY_HEADER_V20)) { if(ipset6_load_binary_v20(fp, ips, 1)) { diff --git a/src/ipset_load.c b/src/ipset_load.c index 65c0d1f..e2f780c 100644 --- a/src/ipset_load.c +++ b/src/ipset_load.c @@ -1,4 +1,9 @@ #include "iprange.h" +#include "iprange6.h" +#include "ipset6_binary.h" + +extern int active_family; +extern unsigned long ipv6_dropped_in_ipv4_mode; /* * the maximum line element to read in input files @@ -760,14 +765,55 @@ ipset *ipset_load(const char *filename) { return ips; } + if(unlikely(!strcmp(line, BINARY_HEADER_V20))) { + fprintf(stderr, "%s: %s: IPv6 binary file cannot be loaded in IPv4 mode (use -6)\n", PROG, ips->filename); + ipset_free(ips); + if(likely(fp != stdin)) fclose(fp); + return NULL; + } + do { lineid++; switch(parse_line(line, lineid, ipstr, ipstr2, MAX_INPUT_ELEMENT)) { case LINE_IS_INVALID: + { + /* check if this is an IPv6 line in IPv4 mode: + * must have at least two colons (a:b or ::x) to be plausible IPv6 */ + char *colon = strchr(line, ':'); + char *colon2 = colon ? strchr(colon + 1, ':') : NULL; + if(colon2 && active_family != 6) { + /* try to extract IPv4 from mapped IPv6 (::ffff:x.x.x.x) */ + char *s = line; + while(*s == ' ' || *s == '\t') s++; + if(s[0] == ':' && s[1] == ':' && (s[2] == 'f' || s[2] == 'F') + && (s[3] == 'f' || s[3] == 'F') && (s[4] == 'f' || s[4] == 'F') + && (s[5] == 'f' || s[5] == 'F') && s[6] == ':') { + /* extract the IPv4 part after ::ffff: */ + char v4str[MAX_INPUT_ELEMENT + 1]; + int vi = 0; + char *v4 = s + 7; + while(vi < MAX_INPUT_ELEMENT && ((*v4 >= '0' && *v4 <= '9') || *v4 == '.' || *v4 == '/')) + v4str[vi++] = *v4++; + v4str[vi] = '\0'; + + /* skip trailing whitespace/comment */ + while(*v4 == ' ' || *v4 == '\t') v4++; + if(vi > 0 && (*v4 == '\0' || *v4 == '\n' || *v4 == '\r' || *v4 == '#' || *v4 == ';')) { + if(ipset_add_ipstr(ips, v4str)) + break; /* successfully converted mapped IPv6 to IPv4 */ + } + } + + /* non-mapped IPv6: drop gracefully with counter */ + ipv6_dropped_in_ipv4_mode++; + break; + } + /* cannot read line */ fprintf(stderr, "%s: Cannot understand line No %d from %s: %s\n", PROG, lineid, ips->filename, line); parse_errors = 1; + } break; case LINE_IS_EMPTY: @@ -835,6 +881,11 @@ ipset *ipset_load(const char *filename) { return NULL; } + if(ipv6_dropped_in_ipv4_mode > 0) { + fprintf(stderr, "%s: %s: %lu IPv6 entries dropped (use -6 for IPv6 mode)\n", PROG, ips->filename, ipv6_dropped_in_ipv4_mode); + ipv6_dropped_in_ipv4_mode = 0; + } + if(unlikely(debug)) fprintf(stderr, "%s: Loaded %s %s\n", PROG, (ips->flags & IPSET_FLAG_OPTIMIZED)?"optimized":"non-optimized", ips->filename); /* diff --git a/tests.d/89-ipv6-adjacency-max-no-wrap/cmd.sh b/tests.d/89-ipv6-adjacency-max-no-wrap/cmd.sh new file mode 100755 index 0000000..687410a --- /dev/null +++ b/tests.d/89-ipv6-adjacency-max-no-wrap/cmd.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# Regression: ipset6_added_entry adjacency check must not wrap at IPV6_ADDR_MAX +# Previously: "ffff:...:ffff" + "::" merged to "::/0" (catastrophic corruption) + +echo "# Two distinct addresses (max and zero) stay separate:" +printf "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff\n::\n" | ../../iprange -6 + +echo "# Count should be 2, not 2^128:" +printf "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff\n::\n" | ../../iprange -6 -C diff --git a/tests.d/89-ipv6-adjacency-max-no-wrap/output b/tests.d/89-ipv6-adjacency-max-no-wrap/output new file mode 100644 index 0000000..700c560 --- /dev/null +++ b/tests.d/89-ipv6-adjacency-max-no-wrap/output @@ -0,0 +1,5 @@ +# Two distinct addresses (max and zero) stay separate: +:: +ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff +# Count should be 2, not 2^128: +2,2 diff --git a/tests.d/90-ipv6-invalid-prefix-rejected/cmd.sh b/tests.d/90-ipv6-invalid-prefix-rejected/cmd.sh new file mode 100755 index 0000000..13c2fa5 --- /dev/null +++ b/tests.d/90-ipv6-invalid-prefix-rejected/cmd.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Regression: non-numeric CIDR prefixes must be rejected +# Previously: /abc silently became /0, expanding to ::/0 + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +for prefix in "abc" "0FFF" "32abc" "999999999999" ""; do + printf "2001:db8::/$prefix\n" | ../../iprange -6 2>/dev/null + if [ $? -eq 0 ]; then + echo "FAIL: /$prefix should have been rejected" + exit 1 + fi +done + +echo "PASS: all invalid prefixes rejected" diff --git a/tests.d/90-ipv6-invalid-prefix-rejected/output b/tests.d/90-ipv6-invalid-prefix-rejected/output new file mode 100644 index 0000000..0d3d16e --- /dev/null +++ b/tests.d/90-ipv6-invalid-prefix-rejected/output @@ -0,0 +1 @@ +PASS: all invalid prefixes rejected diff --git a/tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/cmd.sh b/tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/cmd.sh new file mode 100755 index 0000000..9d9f53a --- /dev/null +++ b/tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/cmd.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Test: IPv4-mapped IPv6 addresses are converted back to IPv4 in default mode + +echo "# Mapped IPv6 converted to IPv4:" +echo "::ffff:10.0.0.1" | ../../iprange + +echo "# Mapped IPv6 with uppercase F:" +echo "::FFFF:192.168.1.1" | ../../iprange + +echo "# Multiple mapped with regular IPv4:" +printf "::ffff:10.0.0.1\n10.0.0.2\n::ffff:10.0.0.3\n" | ../../iprange diff --git a/tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/output b/tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/output new file mode 100644 index 0000000..a9e0dac --- /dev/null +++ b/tests.d/91-ipv4-mapped-ipv6-in-ipv4-mode/output @@ -0,0 +1,7 @@ +# Mapped IPv6 converted to IPv4: +10.0.0.1 +# Mapped IPv6 with uppercase F: +192.168.1.1 +# Multiple mapped with regular IPv4: +10.0.0.1 +10.0.0.2/31 diff --git a/tests.d/92-ipv4-graceful-ipv6-drop/cmd.sh b/tests.d/92-ipv4-graceful-ipv6-drop/cmd.sh new file mode 100755 index 0000000..64c87f9 --- /dev/null +++ b/tests.d/92-ipv4-graceful-ipv6-drop/cmd.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Test: IPv6 addresses in IPv4 mode are dropped gracefully with one warning + +tmpdir=$(mktemp -d) +stderr="$tmpdir/stderr" +stdout="$tmpdir/stdout" +trap 'rm -rf "$tmpdir"' EXIT + +printf "10.0.0.1\n2001:db8::1\nfe80::1\n10.0.0.2\n" | ../../iprange >"$stdout" 2>"$stderr" +rc=$? + +echo "# Exit code should be 0:" +echo "rc=$rc" + +echo "# Output should contain only IPv4:" +cat "$stdout" + +echo "# Warning should mention dropped count:" +if grep -q "IPv6 entries dropped" "$stderr"; then + echo "PASS: warning printed" +else + echo "FAIL: no warning" + cat "$stderr" + exit 1 +fi diff --git a/tests.d/92-ipv4-graceful-ipv6-drop/output b/tests.d/92-ipv4-graceful-ipv6-drop/output new file mode 100644 index 0000000..4e52125 --- /dev/null +++ b/tests.d/92-ipv4-graceful-ipv6-drop/output @@ -0,0 +1,7 @@ +# Exit code should be 0: +rc=0 +# Output should contain only IPv4: +10.0.0.1 +10.0.0.2 +# Warning should mention dropped count: +PASS: warning printed diff --git a/tests.d/93-ipv6-min-prefix-prefixes/cmd.sh b/tests.d/93-ipv6-min-prefix-prefixes/cmd.sh new file mode 100755 index 0000000..e81f787 --- /dev/null +++ b/tests.d/93-ipv6-min-prefix-prefixes/cmd.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Test: --min-prefix and --prefixes work in IPv6 mode + +echo "# Min-prefix 126 (only /126, /127, /128):" +echo "2001:db8::/120" | ../../iprange -6 --min-prefix 126 | head -5 + +echo "# Prefixes 128 only (individual IPs):" +echo "2001:db8::/126" | ../../iprange -6 --prefixes 128 | head -5 diff --git a/tests.d/93-ipv6-min-prefix-prefixes/output b/tests.d/93-ipv6-min-prefix-prefixes/output new file mode 100644 index 0000000..becf6cf --- /dev/null +++ b/tests.d/93-ipv6-min-prefix-prefixes/output @@ -0,0 +1,11 @@ +# Min-prefix 126 (only /126, /127, /128): +2001:db8::/126 +2001:db8::4/126 +2001:db8::8/126 +2001:db8::c/126 +2001:db8::10/126 +# Prefixes 128 only (individual IPs): +2001:db8:: +2001:db8::1 +2001:db8::2 +2001:db8::3 From 7be1947769aeb9bda6ffcc08b7a04cd58581bd6c Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Sat, 4 Apr 2026 19:12:40 +0300 Subject: [PATCH 4/9] Extract DNS into separate modules, fix UINT32_MAX overflow, enable -O3/LTO - Extract IPv4 DNS thread pool from ipset_load.c into ipset_dns.c/h - Extract IPv6 DNS thread pool from ipset6_load.c into ipset6_dns.c/h - ipset_load.c and ipset6_load.c are now pure text parsers - Fix UINT32_MAX boundary wrap in ipset_added_entry() and ipset_optimize() where broadcast + 1 overflows to 0, incorrectly merging ranges at opposite ends of the address space - Enable -O3 and LTO for GCC builds in configure.ac --- Makefile.am | 6 + configure.ac | 14 ++ src/iprange.h | 1 + src/ipset.h | 3 +- src/ipset6_dns.c | 282 ++++++++++++++++++++++++ src/ipset6_dns.h | 8 + src/ipset6_load.c | 294 +------------------------ src/ipset_dns.c | 371 ++++++++++++++++++++++++++++++++ src/ipset_dns.h | 12 ++ src/ipset_load.c | 495 ------------------------------------------- src/ipset_load.h | 4 - src/ipset_optimize.c | 2 +- 12 files changed, 699 insertions(+), 793 deletions(-) create mode 100644 src/ipset6_dns.c create mode 100644 src/ipset6_dns.h create mode 100644 src/ipset_dns.c create mode 100644 src/ipset_dns.h diff --git a/Makefile.am b/Makefile.am index aa292bb..cd52ab7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -45,6 +45,8 @@ iprange_SOURCES = \ src/ipset6_common.c \ src/ipset6_copy.c \ src/ipset6_diff.c \ + src/ipset6_dns.c \ + src/ipset6_dns.h \ src/ipset6_exclude.c \ src/ipset6_load.c \ src/ipset6_load.h \ @@ -62,6 +64,8 @@ iprange_SOURCES = \ src/ipset_copy.h \ src/ipset_diff.c \ src/ipset_diff.h \ + src/ipset_dns.c \ + src/ipset_dns.h \ src/ipset_exclude.c \ src/ipset_exclude.h \ src/ipset_load.c \ @@ -86,6 +90,7 @@ VPATH_LOCAL_OBJECTS = \ src/ipset6_common.$(OBJEXT) \ src/ipset6_copy.$(OBJEXT) \ src/ipset6_diff.$(OBJEXT) \ + src/ipset6_dns.$(OBJEXT) \ src/ipset6_exclude.$(OBJEXT) \ src/ipset6_load.$(OBJEXT) \ src/ipset6_merge.$(OBJEXT) \ @@ -96,6 +101,7 @@ VPATH_LOCAL_OBJECTS = \ src/ipset_common.$(OBJEXT) \ src/ipset_copy.$(OBJEXT) \ src/ipset_diff.$(OBJEXT) \ + src/ipset_dns.$(OBJEXT) \ src/ipset_exclude.$(OBJEXT) \ src/ipset_load.$(OBJEXT) \ src/ipset_merge.$(OBJEXT) \ diff --git a/configure.ac b/configure.ac index cd7a7d6..fae6ae3 100644 --- a/configure.ac +++ b/configure.ac @@ -88,6 +88,20 @@ else AC_DEFINE_UNQUOTED([unlikely(x)], [(x)], [gcc branch optimization]) fi +dnl Use -O3 and LTO for GCC +if test "${GCC}" = "yes"; then + CFLAGS=$(echo "${CFLAGS}" | sed 's/-O[[0-9s]]*/-O3/g') + case "${CFLAGS}" in + *-O*) ;; + *) CFLAGS="${CFLAGS} -O3" ;; + esac + case "${CFLAGS}" in + *-flto*) ;; + *) CFLAGS="${CFLAGS} -flto" + LDFLAGS="${LDFLAGS} -flto" ;; + esac +fi + if test "${enable_pedantic}" = "yes"; then enable_strict="yes" CFLAGS="${CFLAGS} -pedantic -Wall -Wextra" diff --git a/src/iprange.h b/src/iprange.h index d22399c..21bb99a 100644 --- a/src/iprange.h +++ b/src/iprange.h @@ -175,6 +175,7 @@ static inline char *ip2str_r(char *buf, in_addr_t IP) { #include "ipset_copy.h" #include "ipset_diff.h" #include "ipset_exclude.h" +#include "ipset_dns.h" #include "ipset_load.h" #include "ipset_merge.h" #include "ipset_optimize.h" diff --git a/src/ipset.h b/src/ipset.h index e6984f4..aa1a49c 100644 --- a/src/ipset.h +++ b/src/ipset.h @@ -76,7 +76,8 @@ static inline void ipset_added_entry(ipset *ips) { if(likely(ips->flags & IPSET_FLAG_OPTIMIZED && entries > 0)) { // the new is just next to the last - if(unlikely(ips->netaddrs[entries].addr == (ips->netaddrs[entries - 1].broadcast + 1))) { + if(unlikely(ips->netaddrs[entries - 1].broadcast != UINT32_MAX && + ips->netaddrs[entries].addr == (ips->netaddrs[entries - 1].broadcast + 1))) { ips->netaddrs[entries - 1].broadcast = ips->netaddrs[entries].broadcast; return; } diff --git a/src/ipset6_dns.c b/src/ipset6_dns.c new file mode 100644 index 0000000..47c3b8e --- /dev/null +++ b/src/ipset6_dns.c @@ -0,0 +1,282 @@ +#include "iprange.h" +#include "iprange6.h" +#include "ipset6.h" + +/* ---------------------------------------------------------------------------- + * hostname resolution — IPv6 DNS thread pool + * + * resolves both AAAA and A records; + * A records are normalized to IPv4-mapped IPv6 (::ffff:x.x.x.x) + */ + +extern int dns_threads_max; +extern int dns_silent; + +typedef struct dnsreq6 { + struct dnsreq6 *next; + char tries; + char hostname[]; +} DNSREQ6; + +typedef struct dnsrep6 { + ipv6_addr_t ip; + struct dnsrep6 *next; +} DNSREP6; + +static DNSREQ6 *dns6_requests; +static DNSREP6 *dns6_replies; +static int dns6_threads; +static unsigned long dns6_requests_pending; +static unsigned long dns6_requests_made; +static unsigned long dns6_requests_finished; +static unsigned long dns6_requests_retries; +static unsigned long dns6_replies_found; +static unsigned long dns6_replies_failed; + +static pthread_cond_t dns6_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t dns6_requests_mut = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t dns6_replies_mut = PTHREAD_MUTEX_INITIALIZER; + +void dns6_reset_stats(void) +{ + pthread_mutex_lock(&dns6_requests_mut); + dns6_requests = NULL; + dns6_requests_pending = 0; + dns6_requests_made = 0; + dns6_requests_finished = 0; + dns6_requests_retries = 0; + dns6_replies_found = 0; + dns6_replies_failed = 0; + pthread_mutex_unlock(&dns6_requests_mut); + + pthread_mutex_lock(&dns6_replies_mut); + dns6_replies = NULL; + pthread_mutex_unlock(&dns6_replies_mut); +} + +static void *dns6_thread_resolve(void *ptr); + +static void dns6_signal_threads(void) +{ + pthread_mutex_lock(&dns6_requests_mut); + pthread_cond_signal(&dns6_cond); + pthread_mutex_unlock(&dns6_requests_mut); +} + +static int dns6_request_add(DNSREQ6 *d) +{ + unsigned long pending; + + pthread_mutex_lock(&dns6_requests_mut); + d->next = dns6_requests; + dns6_requests = d; + dns6_requests_pending++; + dns6_requests_made++; + pending = dns6_requests_pending; + pthread_mutex_unlock(&dns6_requests_mut); + + if(pending > (unsigned long)dns6_threads && dns6_threads < dns_threads_max) { + pthread_t thread; + if(pthread_create(&thread, NULL, dns6_thread_resolve, NULL)) { + fprintf(stderr, "%s: Cannot create DNS thread.\n", PROG); + if(dns6_threads == 0) { + pthread_mutex_lock(&dns6_requests_mut); + dns6_requests = d->next; + dns6_requests_pending--; + dns6_requests_made--; + pthread_mutex_unlock(&dns6_requests_mut); + free(d); + return -1; + } + } + else { + dns6_threads++; + pthread_detach(thread); + } + } + + dns6_signal_threads(); + return 0; +} + +static void dns6_request_done(DNSREQ6 *d, int added) +{ + pthread_mutex_lock(&dns6_requests_mut); + dns6_requests_pending--; + dns6_requests_finished++; + if(!added) dns6_replies_failed++; + else dns6_replies_found += added; + pthread_mutex_unlock(&dns6_requests_mut); + free(d); +} + +static void dns6_request_failed(DNSREQ6 *d, int added, int gai_error) +{ + switch(gai_error) { + case EAI_AGAIN: + if(d->tries > 0) { + if(!dns_silent) + fprintf(stderr, "%s: DNS: '%s' will be retried: %s\n", PROG, d->hostname, gai_strerror(gai_error)); + d->tries--; + pthread_mutex_lock(&dns6_requests_mut); + d->next = dns6_requests; + dns6_requests = d; + dns6_requests_retries++; + dns6_replies_found += added; + pthread_mutex_unlock(&dns6_requests_mut); + return; + } + /* fall through */ + default: + if(!dns_silent) + fprintf(stderr, "%s: DNS: '%s' failed: %s\n", PROG, d->hostname, gai_strerror(gai_error)); + dns6_request_done(d, added); + return; + } +} + +static DNSREQ6 *dns6_request_get(void) +{ + DNSREQ6 *ret = NULL; + + while(!ret) { + pthread_mutex_lock(&dns6_requests_mut); + if(dns6_requests) { + ret = dns6_requests; + dns6_requests = dns6_requests->next; + ret->next = NULL; + } + pthread_mutex_unlock(&dns6_requests_mut); + if(ret) continue; + + pthread_mutex_lock(&dns6_requests_mut); + while(!dns6_requests) + pthread_cond_wait(&dns6_cond, &dns6_requests_mut); + pthread_mutex_unlock(&dns6_requests_mut); + } + + return ret; +} + +static void *dns6_thread_resolve(void *ptr) +{ + DNSREQ6 *d; + (void)ptr; + + while((d = dns6_request_get())) { + int added = 0; + int r; + struct addrinfo *result, *rp, hints; + + /* resolve both IPv4 and IPv6 */ + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + + r = getaddrinfo(d->hostname, "80", &hints, &result); + if(r != 0) { + dns6_request_failed(d, 0, r); + continue; + } + + for(rp = result; rp != NULL; rp = rp->ai_next) { + DNSREP6 *p; + ipv6_addr_t ip; + + if(rp->ai_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)rp->ai_addr; + ip = in6_addr_to_ipv6(&sa6->sin6_addr); + } + else if(rp->ai_family == AF_INET) { + struct sockaddr_in *sa4 = (struct sockaddr_in *)rp->ai_addr; + ip = ipv4_to_mapped6(ntohl(sa4->sin_addr.s_addr)); + } + else continue; + + p = malloc(sizeof(DNSREP6)); + if(!p) { + fprintf(stderr, "%s: DNS: out of memory while resolving host '%s'\n", PROG, d->hostname); + continue; + } + + p->ip = ip; + pthread_mutex_lock(&dns6_replies_mut); + p->next = dns6_replies; + dns6_replies = p; + added++; + pthread_mutex_unlock(&dns6_replies_mut); + } + + freeaddrinfo(result); + dns6_request_done(d, added); + } + + return NULL; +} + +static void dns6_process_replies(ipset6 *ips) +{ + pthread_mutex_lock(&dns6_replies_mut); + while(dns6_replies) { + DNSREP6 *p; + ipset6_add_ip_range(ips, dns6_replies->ip, dns6_replies->ip); + p = dns6_replies->next; + free(dns6_replies); + dns6_replies = p; + } + pthread_mutex_unlock(&dns6_replies_mut); +} + +int dns6_request(ipset6 *ips, char *hostname) +{ + DNSREQ6 *d; + + dns6_process_replies(ips); + + d = malloc(sizeof(DNSREQ6) + strlen(hostname) + 1); + if(!d) { + fprintf(stderr, "%s: out of memory, while trying to resolve '%s'\n", PROG, hostname); + return -1; + } + + strcpy(d->hostname, hostname); + d->tries = 20; + + if(dns6_request_add(d)) + return -1; + + return 0; +} + +int dns6_done(ipset6 *ips) +{ + unsigned long pending, made; + + pthread_mutex_lock(&dns6_requests_mut); + made = dns6_requests_made; + pthread_mutex_unlock(&dns6_requests_mut); + + if(!made) { + dns6_reset_stats(); + return 0; + } + + while(1) { + pthread_mutex_lock(&dns6_requests_mut); + pending = dns6_requests_pending; + pthread_mutex_unlock(&dns6_requests_mut); + + if(!pending) break; + + dns6_process_replies(ips); + + if(pending) { + dns6_signal_threads(); + sleep(1); + } + } + dns6_process_replies(ips); + + dns6_reset_stats(); + return 0; +} diff --git a/src/ipset6_dns.h b/src/ipset6_dns.h new file mode 100644 index 0000000..3e82176 --- /dev/null +++ b/src/ipset6_dns.h @@ -0,0 +1,8 @@ +#ifndef IPRANGE_IPSET6_DNS_H +#define IPRANGE_IPSET6_DNS_H + +extern int dns6_request(ipset6 *ips, char *hostname); +extern int dns6_done(ipset6 *ips); +extern void dns6_reset_stats(void); + +#endif //IPRANGE_IPSET6_DNS_H diff --git a/src/ipset6_load.c b/src/ipset6_load.c index 7eb7db1..6295ef2 100644 --- a/src/ipset6_load.c +++ b/src/ipset6_load.c @@ -2,6 +2,7 @@ #include "iprange6.h" #include "ipset6.h" #include "ipset6_binary.h" +#include "ipset6_dns.h" #include "ipset6_load.h" #define MAX_INPUT_ELEMENT6 256 @@ -34,13 +35,6 @@ static inline int classify_address(const char *token) { /* * Parse a line that may contain IPv6 addresses. * Returns the same IPSET_LINE_TYPE enum values as the IPv4 parser. - * - * Accepted formats: - * - IPv6 address: 2001:db8::1 - * - IPv6 CIDR: 2001:db8::/32 - * - IPv6 range: 2001:db8::1 - 2001:db8::ff - * - IPv4 address (for normalization to mapped IPv6) - * - hostname (for DNS resolution) */ typedef enum { LINE6_IS_INVALID = -1, @@ -83,7 +77,6 @@ static inline IPSET6_LINE_TYPE parse_line6(char *line, int lineid, char *ipstr, /* if no chars matched in the IPv6 set, try hostname */ if(!i) { - /* try as hostname */ i = 0; s = line; while(*s == ' ' || *s == '\t') s++; @@ -148,18 +141,15 @@ static inline IPSET6_LINE_TYPE parse_line6(char *line, int lineid, char *ipstr, /* * Parse an address string in IPv6 mode. * Accepts both IPv6 and IPv4 (normalizing IPv4 to mapped IPv6). - * Returns the parsed network_addr6_t. */ static network_addr6_t parse_address6(char *ipstr, int *err) { network_addr6_t netaddr; int addr_class = classify_address(ipstr); if(addr_class == 6) { - /* IPv6 literal */ return str2netaddr6(ipstr, err); } else if(addr_class == 4) { - /* IPv4 literal: normalize to mapped IPv6 */ network_addr_t v4 = str2netaddr(ipstr, err); if(*err) { netaddr.addr = 0; @@ -167,13 +157,11 @@ static network_addr6_t parse_address6(char *ipstr, int *err) { return netaddr; } - /* handle CIDR: if the IPv4 had a prefix, map the range */ netaddr.addr = ipv4_to_mapped6(v4.addr); netaddr.broadcast = ipv4_to_mapped6(v4.broadcast); return netaddr; } - /* unknown format */ if(err) (*err)++; fprintf(stderr, "%s: Cannot parse address: %s\n", PROG, ipstr); netaddr.addr = 0; @@ -181,286 +169,8 @@ static network_addr6_t parse_address6(char *ipstr, int *err) { return netaddr; } -/* DNS structures and functions from ipset_load.c */ -extern int dns_threads_max; -extern int dns_silent; -extern int dns_progress; - -/* IPv6 DNS resolution types */ -typedef struct dnsreq6 { - struct dnsreq6 *next; - char tries; - char hostname[]; -} DNSREQ6; - -typedef struct dnsrep6 { - ipv6_addr_t ip; - struct dnsrep6 *next; -} DNSREP6; - -static DNSREQ6 *dns6_requests; -static DNSREP6 *dns6_replies; -static int dns6_threads; -static unsigned long dns6_requests_pending; -static unsigned long dns6_requests_made; -static unsigned long dns6_requests_finished; -static unsigned long dns6_requests_retries; -static unsigned long dns6_replies_found; -static unsigned long dns6_replies_failed; - -static pthread_cond_t dns6_cond = PTHREAD_COND_INITIALIZER; -static pthread_mutex_t dns6_requests_mut = PTHREAD_MUTEX_INITIALIZER; -static pthread_mutex_t dns6_replies_mut = PTHREAD_MUTEX_INITIALIZER; - -static void dns6_reset_stats(void) -{ - pthread_mutex_lock(&dns6_requests_mut); - dns6_requests = NULL; - dns6_requests_pending = 0; - dns6_requests_made = 0; - dns6_requests_finished = 0; - dns6_requests_retries = 0; - dns6_replies_found = 0; - dns6_replies_failed = 0; - pthread_mutex_unlock(&dns6_requests_mut); - - pthread_mutex_lock(&dns6_replies_mut); - dns6_replies = NULL; - pthread_mutex_unlock(&dns6_replies_mut); -} - -static void *dns6_thread_resolve(void *ptr); - -static void dns6_signal_threads(void) -{ - pthread_mutex_lock(&dns6_requests_mut); - pthread_cond_signal(&dns6_cond); - pthread_mutex_unlock(&dns6_requests_mut); -} - -static int dns6_request_add(DNSREQ6 *d) -{ - unsigned long pending; - - pthread_mutex_lock(&dns6_requests_mut); - d->next = dns6_requests; - dns6_requests = d; - dns6_requests_pending++; - dns6_requests_made++; - pending = dns6_requests_pending; - pthread_mutex_unlock(&dns6_requests_mut); - - if(pending > (unsigned long)dns6_threads && dns6_threads < dns_threads_max) { - pthread_t thread; - if(pthread_create(&thread, NULL, dns6_thread_resolve, NULL)) { - fprintf(stderr, "%s: Cannot create DNS thread.\n", PROG); - if(dns6_threads == 0) { - pthread_mutex_lock(&dns6_requests_mut); - dns6_requests = d->next; - dns6_requests_pending--; - dns6_requests_made--; - pthread_mutex_unlock(&dns6_requests_mut); - free(d); - return -1; - } - } - else { - dns6_threads++; - pthread_detach(thread); - } - } - - dns6_signal_threads(); - return 0; -} - -static void dns6_request_done(DNSREQ6 *d, int added) -{ - pthread_mutex_lock(&dns6_requests_mut); - dns6_requests_pending--; - dns6_requests_finished++; - if(!added) dns6_replies_failed++; - else dns6_replies_found += added; - pthread_mutex_unlock(&dns6_requests_mut); - free(d); -} - -static void dns6_request_failed(DNSREQ6 *d, int added, int gai_error) -{ - switch(gai_error) { - case EAI_AGAIN: - if(d->tries > 0) { - if(!dns_silent) - fprintf(stderr, "%s: DNS: '%s' will be retried: %s\n", PROG, d->hostname, gai_strerror(gai_error)); - d->tries--; - pthread_mutex_lock(&dns6_requests_mut); - d->next = dns6_requests; - dns6_requests = d; - dns6_requests_retries++; - dns6_replies_found += added; - pthread_mutex_unlock(&dns6_requests_mut); - return; - } - /* fall through */ - default: - if(!dns_silent) - fprintf(stderr, "%s: DNS: '%s' failed: %s\n", PROG, d->hostname, gai_strerror(gai_error)); - dns6_request_done(d, added); - return; - } -} - -static DNSREQ6 *dns6_request_get(void) -{ - DNSREQ6 *ret = NULL; - - while(!ret) { - pthread_mutex_lock(&dns6_requests_mut); - if(dns6_requests) { - ret = dns6_requests; - dns6_requests = dns6_requests->next; - ret->next = NULL; - } - pthread_mutex_unlock(&dns6_requests_mut); - if(ret) continue; - - pthread_mutex_lock(&dns6_requests_mut); - while(!dns6_requests) - pthread_cond_wait(&dns6_cond, &dns6_requests_mut); - pthread_mutex_unlock(&dns6_requests_mut); - } - - return ret; -} - -/* - * DNS thread for IPv6 mode: resolves both AAAA and A records. - * A records are normalized to IPv4-mapped IPv6 (::ffff:x.x.x.x). - */ -static void *dns6_thread_resolve(void *ptr) -{ - DNSREQ6 *d; - (void)ptr; - - while((d = dns6_request_get())) { - int added = 0; - int r; - struct addrinfo *result, *rp, hints; - - /* resolve both IPv4 and IPv6 */ - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_DGRAM; - - r = getaddrinfo(d->hostname, "80", &hints, &result); - if(r != 0) { - dns6_request_failed(d, 0, r); - continue; - } - - for(rp = result; rp != NULL; rp = rp->ai_next) { - DNSREP6 *p; - ipv6_addr_t ip; - - if(rp->ai_family == AF_INET6) { - struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)rp->ai_addr; - ip = in6_addr_to_ipv6(&sa6->sin6_addr); - } - else if(rp->ai_family == AF_INET) { - struct sockaddr_in *sa4 = (struct sockaddr_in *)rp->ai_addr; - ip = ipv4_to_mapped6(ntohl(sa4->sin_addr.s_addr)); - } - else continue; - - p = malloc(sizeof(DNSREP6)); - if(!p) { - fprintf(stderr, "%s: DNS: out of memory while resolving host '%s'\n", PROG, d->hostname); - continue; - } - - p->ip = ip; - pthread_mutex_lock(&dns6_replies_mut); - p->next = dns6_replies; - dns6_replies = p; - added++; - pthread_mutex_unlock(&dns6_replies_mut); - } - - freeaddrinfo(result); - dns6_request_done(d, added); - } - - return NULL; -} - -static void dns6_process_replies(ipset6 *ips) -{ - pthread_mutex_lock(&dns6_replies_mut); - while(dns6_replies) { - DNSREP6 *p; - ipset6_add_ip_range(ips, dns6_replies->ip, dns6_replies->ip); - p = dns6_replies->next; - free(dns6_replies); - dns6_replies = p; - } - pthread_mutex_unlock(&dns6_replies_mut); -} - -static int dns6_request(ipset6 *ips, char *hostname) -{ - DNSREQ6 *d; - - dns6_process_replies(ips); - - d = malloc(sizeof(DNSREQ6) + strlen(hostname) + 1); - if(!d) { - fprintf(stderr, "%s: out of memory, while trying to resolve '%s'\n", PROG, hostname); - return -1; - } - - strcpy(d->hostname, hostname); - d->tries = 20; - - if(dns6_request_add(d)) - return -1; - - return 0; -} - -static int dns6_done(ipset6 *ips) -{ - unsigned long pending, made; - - pthread_mutex_lock(&dns6_requests_mut); - made = dns6_requests_made; - pthread_mutex_unlock(&dns6_requests_mut); - - if(!made) { - dns6_reset_stats(); - return 0; - } - - while(1) { - pthread_mutex_lock(&dns6_requests_mut); - pending = dns6_requests_pending; - pthread_mutex_unlock(&dns6_requests_mut); - - if(!pending) break; - dns6_process_replies(ips); - - if(pending) { - dns6_signal_threads(); - sleep(1); - } - } - dns6_process_replies(ips); - - dns6_reset_stats(); - return 0; -} - -/* +/* ---------------------------------------------------------------------------- * ipset6_load() * * Load a file into an IPv6 ipset. diff --git a/src/ipset_dns.c b/src/ipset_dns.c new file mode 100644 index 0000000..34f5ef7 --- /dev/null +++ b/src/ipset_dns.c @@ -0,0 +1,371 @@ +#include "iprange.h" + +#define MAX_INPUT_ELEMENT 255 + +/* ---------------------------------------------------------------------------- + * hostname resolution — IPv4 DNS thread pool + */ + +typedef struct dnsreq { + struct dnsreq *next; + char tries; + char hostname[]; +} DNSREQ; + +typedef struct dnsrep { + in_addr_t ip; + struct dnsrep *next; +} DNSREP; + +static DNSREQ *dns_requests; +static DNSREP *dns_replies; +static int dns_threads; +int dns_threads_max = 5; +int dns_silent; +int dns_progress; +static unsigned long dns_requests_pending; +static unsigned long dns_requests_made; +static unsigned long dns_requests_finished; +static unsigned long dns_requests_retries; +static unsigned long dns_replies_found; +static unsigned long dns_replies_failed; + +static pthread_cond_t dns_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t dns_requests_mut = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t dns_replies_mut = PTHREAD_MUTEX_INITIALIZER; + +static void dns_lock_requests(void) { pthread_mutex_lock(&dns_requests_mut); } +static void dns_unlock_requests(void) { pthread_mutex_unlock(&dns_requests_mut); } +static void dns_lock_replies(void) { pthread_mutex_lock(&dns_replies_mut); } +static void dns_unlock_replies(void) { pthread_mutex_unlock(&dns_replies_mut); } + +void dns_reset_stats(void) +{ + dns_lock_requests(); + dns_requests = NULL; + dns_requests_pending = 0; + dns_requests_made = 0; + dns_requests_finished = 0; + dns_requests_retries = 0; + dns_replies_found = 0; + dns_replies_failed = 0; + dns_unlock_requests(); + + dns_lock_replies(); + dns_replies = NULL; + dns_unlock_replies(); +} + +static void dns_thread_wait_for_requests(void) { + dns_lock_requests(); + while(!dns_requests) + pthread_cond_wait(&dns_cond, &dns_requests_mut); + dns_unlock_requests(); +} + +static void dns_signal_threads(void) +{ + dns_lock_requests(); + pthread_cond_signal(&dns_cond); + dns_unlock_requests(); +} + +static void *dns_thread_resolve(void *ptr); + +static int dns_request_add(DNSREQ *d) +{ + unsigned long pending; + + dns_lock_requests(); + d->next = dns_requests; + dns_requests = d; + dns_requests_pending++; + dns_requests_made++; + + pending = dns_requests_pending; + dns_unlock_requests(); + + if(pending > (unsigned long)dns_threads && dns_threads < dns_threads_max) { + pthread_t thread; + + if(unlikely(debug)) + fprintf(stderr, "%s: Creating new DNS thread\n", PROG); + + if(pthread_create(&thread, NULL, dns_thread_resolve, NULL)) { + fprintf(stderr, "%s: Cannot create DNS thread.\n", PROG); + if(dns_threads == 0) { + dns_lock_requests(); + dns_requests = d->next; + dns_requests_pending--; + dns_requests_made--; + dns_unlock_requests(); + + free(d); + return -1; + } + } + else { + dns_threads++; + if(pthread_detach(thread)) + fprintf(stderr, "%s: Cannot detach DNS thread.\n", PROG); + } + } + + dns_signal_threads(); + return 0; +} + +static void dns_request_done(DNSREQ *d, int added) +{ + dns_lock_requests(); + dns_requests_pending--; + dns_requests_finished++; + + if(!added) dns_replies_failed++; + else dns_replies_found += added; + + dns_unlock_requests(); + + free(d); +} + +static void dns_request_failed(DNSREQ *d, int added, int gai_error) +{ + switch(gai_error) { + case EAI_AGAIN: + if(d->tries > 0) { + if(!dns_silent) + fprintf(stderr, "%s: DNS: '%s' will be retried: %s\n", PROG, d->hostname, gai_strerror(gai_error)); + + d->tries--; + + dns_lock_requests(); + d->next = dns_requests; + dns_requests = d; + dns_requests_retries++; + dns_replies_found += added; + dns_unlock_requests(); + return; + } + dns_request_done(d, added); + return; + + case EAI_SYSTEM: + fprintf(stderr, "%s: DNS: '%s' system error: %s\n", PROG, d->hostname, strerror(errno)); + dns_request_done(d, added); + return; + + case EAI_SOCKTYPE: + case EAI_SERVICE: + case EAI_MEMORY: + case EAI_BADFLAGS: + fprintf(stderr, "%s: DNS: '%s' error: %s\n", PROG, d->hostname, gai_strerror(gai_error)); + dns_request_done(d, added); + return; + + case EAI_NONAME: + case EAI_FAIL: + case EAI_FAMILY: + default: + if(!dns_silent) + fprintf(stderr, "%s: DNS: '%s' failed permanently: %s\n", PROG, d->hostname, gai_strerror(gai_error)); + dns_request_done(d, added); + return; + } +} + +static DNSREQ *dns_request_get(void) +{ + DNSREQ *ret = NULL; + + while(!ret) { + dns_lock_requests(); + if(dns_requests) { + ret = dns_requests; + dns_requests = dns_requests->next; + ret->next = NULL; + } + dns_unlock_requests(); + if(ret) continue; + + dns_thread_wait_for_requests(); + } + + return ret; +} + +static void *dns_thread_resolve(void *ptr) +{ + DNSREQ *d; + + if(ptr) { ; } + + while((d = dns_request_get())) { + int added = 0; + + int r; + struct addrinfo *result, *rp, hints; + + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_flags = 0; + hints.ai_protocol = 0; + + r = getaddrinfo(d->hostname, "80", &hints, &result); + if(r != 0) { + dns_request_failed(d, 0, r); + continue; + } + + for (rp = result; rp != NULL; rp = rp->ai_next) { + char host[MAX_INPUT_ELEMENT + 1] = ""; + network_addr_t net; + int err = 0; + DNSREP *p; + + r = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST); + if (r != 0) { + fprintf(stderr, "%s: DNS: '%s' failed to get IP string: %s\n", PROG, d->hostname, gai_strerror(r)); + continue; + } + + net = str2netaddr(host, &err); + if(err) { + fprintf(stderr, "%s: DNS: '%s' cannot parse the IP '%s': %s\n", PROG, d->hostname, host, gai_strerror(r)); + continue; + } + + p = malloc(sizeof(DNSREP)); + if(!p) { + fprintf(stderr, "%s: DNS: out of memory while resolving host '%s'\n", PROG, d->hostname); + continue; + } + + if(unlikely(debug)) { + char buf[IP2STR_MAX_LEN + 1]; + fprintf(stderr, "%s: DNS: '%s' = %s\n", PROG, d->hostname, ip2str_r(buf, net.addr)); + } + + p->ip = net.addr; + dns_lock_replies(); + p->next = dns_replies; + dns_replies = p; + added++; + dns_unlock_replies(); + } + + freeaddrinfo(result); + dns_request_done(d, added); + } + + return NULL; +} + +static void dns_process_replies(ipset *ips) +{ + dns_lock_replies(); + + if(!dns_replies) { + dns_unlock_replies(); + return; + } + + while(dns_replies) { + DNSREP *p; + ipset_add_ip_range(ips, dns_replies->ip, dns_replies->ip); + p = dns_replies->next; + free(dns_replies); + dns_replies = p; + } + dns_unlock_replies(); +} + +int dns_request(ipset *ips, char *hostname) +{ + DNSREQ *d; + + dns_process_replies(ips); + + d = malloc(sizeof(DNSREQ) + strlen(hostname) + 1); + if(!d) goto cleanup; + + strcpy(d->hostname, hostname); + d->tries = 20; + + if(dns_request_add(d)) + return -1; + + return 0; + + cleanup: + fprintf(stderr, "%s: out of memory, while trying to resolv '%s'\n", PROG, hostname); + return -1; +} + +int dns_done(ipset *ips) +{ + unsigned long dots = 40, shown = 0, should_show = 0; + unsigned long pending, made, finished, retries, replies_found, replies_failed; + + if(ips) { ; } + + dns_lock_requests(); + made = dns_requests_made; + dns_unlock_requests(); + + if(!made) { + dns_reset_stats(); + return 0; + } + + while(1) { + dns_lock_requests(); + pending = dns_requests_pending; + made = dns_requests_made; + finished = dns_requests_finished; + retries = dns_requests_retries; + replies_found = dns_replies_found; + replies_failed = dns_replies_failed; + dns_unlock_requests(); + + if(!pending) break; + + if(unlikely(debug)) + fprintf(stderr, "%s: DNS: waiting %lu DNS resolutions to finish...\n", PROG, pending); + else if(dns_progress) { + should_show = dots * finished / made; + for(; shown < should_show; shown++) { + if(!(shown % 10)) fprintf(stderr, "%lu%%", shown * 100 / dots); + else fprintf(stderr, "."); + } + } + + dns_process_replies(ips); + + if(pending) { + dns_signal_threads(); + sleep(1); + } + } + dns_process_replies(ips); + + dns_lock_requests(); + made = dns_requests_made; + retries = dns_requests_retries; + replies_found = dns_replies_found; + replies_failed = dns_replies_failed; + dns_unlock_requests(); + + if(unlikely(debug)) + fprintf(stderr, "%s: DNS: made %lu DNS requests, failed %lu, retries: %lu, IPs got %lu, threads used %d of %d\n", PROG, made, replies_failed, retries, replies_found, dns_threads, dns_threads_max); + else if(dns_progress) { + for(; shown <= dots; shown++) { + if(!(shown % 10)) fprintf(stderr, "%lu%%", shown * 100 / dots); + else fprintf(stderr, "."); + } + fprintf(stderr, "\n"); + } + + dns_reset_stats(); + return (replies_failed != 0); +} diff --git a/src/ipset_dns.h b/src/ipset_dns.h new file mode 100644 index 0000000..86d5f14 --- /dev/null +++ b/src/ipset_dns.h @@ -0,0 +1,12 @@ +#ifndef IPRANGE_IPSET_DNS_H +#define IPRANGE_IPSET_DNS_H + +extern int dns_threads_max; +extern int dns_silent; +extern int dns_progress; + +extern int dns_request(ipset *ips, char *hostname); +extern int dns_done(ipset *ips); +extern void dns_reset_stats(void); + +#endif //IPRANGE_IPSET_DNS_H diff --git a/src/ipset_load.c b/src/ipset_load.c index e2f780c..5adb877 100644 --- a/src/ipset_load.c +++ b/src/ipset_load.c @@ -219,493 +219,6 @@ static inline IPSET_LINE_TYPE parse_line(char *line, int lineid, char *ipstr, ch return LINE_IS_INVALID; } -/* ---------------------------------------------------------------------------- - * hostname resolution - */ - -typedef struct dnsreq { - struct dnsreq *next; - char tries; - char hostname[]; -} DNSREQ; - -typedef struct dnsrep { - in_addr_t ip; - struct dnsrep *next; -} DNSREP; - -static DNSREQ *dns_requests; -static DNSREP *dns_replies; -static int dns_threads; -int dns_threads_max = 5; -int dns_silent; -int dns_progress; -static unsigned long dns_requests_pending; -static unsigned long dns_requests_made; -static unsigned long dns_requests_finished; -static unsigned long dns_requests_retries; -static unsigned long dns_replies_found; -static unsigned long dns_replies_failed; - -static pthread_cond_t dns_cond = PTHREAD_COND_INITIALIZER; -static pthread_mutex_t dns_requests_mut = PTHREAD_MUTEX_INITIALIZER; -static pthread_mutex_t dns_replies_mut = PTHREAD_MUTEX_INITIALIZER; - -void dns_lock_requests(void) { pthread_mutex_lock(&dns_requests_mut); } -void dns_unlock_requests(void) { pthread_mutex_unlock(&dns_requests_mut); } -void dns_lock_replies(void) { pthread_mutex_lock(&dns_replies_mut); } -void dns_unlock_replies(void) { pthread_mutex_unlock(&dns_replies_mut); } - -static void dns_reset_stats(void) -{ - dns_lock_requests(); - dns_requests = NULL; - dns_requests_pending = 0; - dns_requests_made = 0; - dns_requests_finished = 0; - dns_requests_retries = 0; - dns_replies_found = 0; - dns_replies_failed = 0; - dns_unlock_requests(); - - dns_lock_replies(); - dns_replies = NULL; - dns_unlock_replies(); -} - -// the threads waiting for requests -void dns_thread_wait_for_requests(void) { - dns_lock_requests(); - while(!dns_requests) - pthread_cond_wait(&dns_cond, &dns_requests_mut); - dns_unlock_requests(); -} - -// the master signals the threads for new requests -static void dns_signal_threads(void) -{ - /* signal the childs we have a new request for them */ - dns_lock_requests(); - pthread_cond_signal(&dns_cond); - dns_unlock_requests(); -} - - -static void *dns_thread_resolve(void *ptr); - -/* ---------------------------------------------------------------------------- - * dns_request_add() - * - * add a new DNS resolution request to the queue - * - */ - -static int dns_request_add(DNSREQ *d) -{ - unsigned long pending; - - dns_lock_requests(); - d->next = dns_requests; - dns_requests = d; - dns_requests_pending++; - dns_requests_made++; - - pending = dns_requests_pending; - dns_unlock_requests(); - - /* do we have start a new thread? */ - if(pending > (unsigned long)dns_threads && dns_threads < dns_threads_max) { - pthread_t thread; - - if(unlikely(debug)) - fprintf(stderr, "%s: Creating new DNS thread\n", PROG); - - if(pthread_create(&thread, NULL, dns_thread_resolve, NULL)) { - fprintf(stderr, "%s: Cannot create DNS thread.\n", PROG); - if(dns_threads == 0) { - dns_lock_requests(); - dns_requests = d->next; - dns_requests_pending--; - dns_requests_made--; - dns_unlock_requests(); - - free(d); - return -1; - } - } - else { - dns_threads++; - if(pthread_detach(thread)) - fprintf(stderr, "%s: Cannot detach DNS thread.\n", PROG); - } - } - - dns_signal_threads(); - return 0; -} - - -/* ---------------------------------------------------------------------------- - * dns_request_done() - * - * to be called by a worker thread - * let the main thread know a DNS resolution has been completed - * - */ - -static void dns_request_done(DNSREQ *d, int added) -{ - dns_lock_requests(); - dns_requests_pending--; - dns_requests_finished++; - - if(!added) dns_replies_failed++; - else dns_replies_found += added; - - dns_unlock_requests(); - - free(d); -} - - -/* ---------------------------------------------------------------------------- - * dns_request_failed() - * - * to be called by a worker thread - * handle a DNS failure (mainly for retries) - * - */ - -static void dns_request_failed(DNSREQ *d, int added, int gai_error) -{ - switch(gai_error) { - case EAI_AGAIN: /* The name server returned a temporary failure indication. Try again later. */ - if(d->tries > 0) { - if(!dns_silent) - fprintf(stderr, "%s: DNS: '%s' will be retried: %s\n", PROG, d->hostname, gai_strerror(gai_error)); - - d->tries--; - - dns_lock_requests(); - d->next = dns_requests; - dns_requests = d; - dns_requests_retries++; - dns_replies_found += added; - dns_unlock_requests(); - return; - } - dns_request_done(d, added); - return; - - case EAI_SYSTEM: - fprintf(stderr, "%s: DNS: '%s' system error: %s\n", PROG, d->hostname, strerror(errno)); - dns_request_done(d, added); - return; - - case EAI_SOCKTYPE: /* The requested socket type is not supported. */ - case EAI_SERVICE: /* The requested service is not available for the requested socket type. */ - case EAI_MEMORY: /* Out of memory. */ - case EAI_BADFLAGS: /* hints.ai_flags contains invalid flags; or, hints.ai_flags included AI_CANONNAME and name was NULL. */ - fprintf(stderr, "%s: DNS: '%s' error: %s\n", PROG, d->hostname, gai_strerror(gai_error)); - dns_request_done(d, added); - return; - - case EAI_NONAME: /* The node or service is not known */ - case EAI_FAIL: /* The name server returned a permanent failure indication. */ - case EAI_FAMILY: /* The requested address family is not supported. */ - default: - if(!dns_silent) - fprintf(stderr, "%s: DNS: '%s' failed permanently: %s\n", PROG, d->hostname, gai_strerror(gai_error)); - dns_request_done(d, added); - return; - } -} - - -/* ---------------------------------------------------------------------------- - * dns_request_get() - * - * to be called by a worker thread - * get a request from the requests queue - * - */ - -static DNSREQ *dns_request_get(void) -{ - DNSREQ *ret = NULL; - - /* - * if(unlikely(debug)) - * fprintf(stderr, "%s: DNS THREAD waiting for DNS REQUEST\n", PROG); - */ - - while(!ret) { - dns_lock_requests(); - if(dns_requests) { - ret = dns_requests; - dns_requests = dns_requests->next; - ret->next = NULL; - } - dns_unlock_requests(); - if(ret) continue; - - dns_thread_wait_for_requests(); - } - - return ret; -} - - -/* ---------------------------------------------------------------------------- - * dns_thread_resolve() - * - * a pthread worker to get requests and generate replies - * - */ - -static void *dns_thread_resolve(void *ptr) -{ - DNSREQ *d; - - if(ptr) { ; } - - /* - * if(unlikely(debug)) - * fprintf(stderr, "%s: DNS THREAD created\n", PROG); - */ - - while((d = dns_request_get())) { - int added = 0; - - /* - * if(unlikely(debug)) - * fprintf(stderr, "%s: DNS THREAD resolving DNS REQUEST for '%s'\n", PROG, d->hostname); - */ - - int r; - struct addrinfo *result, *rp, hints; - - hints.ai_family = AF_INET; - hints.ai_socktype = SOCK_DGRAM; - hints.ai_flags = 0; - hints.ai_protocol = 0; - - r = getaddrinfo(d->hostname, "80", &hints, &result); - if(r != 0) { - dns_request_failed(d, 0, r); - continue; - } - - for (rp = result; rp != NULL; rp = rp->ai_next) { - char host[MAX_INPUT_ELEMENT + 1] = ""; - network_addr_t net; - int err = 0; - DNSREP *p; - - r = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST); - if (r != 0) { - fprintf(stderr, "%s: DNS: '%s' failed to get IP string: %s\n", PROG, d->hostname, gai_strerror(r)); - continue; - } - - net = str2netaddr(host, &err); - if(err) { - fprintf(stderr, "%s: DNS: '%s' cannot parse the IP '%s': %s\n", PROG, d->hostname, host, gai_strerror(r)); - continue; - } - - p = malloc(sizeof(DNSREP)); - if(!p) { - fprintf(stderr, "%s: DNS: out of memory while resolving host '%s'\n", PROG, d->hostname); - continue; - } - - if(unlikely(debug)) { - char buf[IP2STR_MAX_LEN + 1]; - fprintf(stderr, "%s: DNS: '%s' = %s\n", PROG, d->hostname, ip2str_r(buf, net.addr)); - } - - p->ip = net.addr; - dns_lock_replies(); - p->next = dns_replies; - dns_replies = p; - added++; - dns_unlock_replies(); - } - - freeaddrinfo(result); - dns_request_done(d, added); - } - - return NULL; -} - -/* ---------------------------------------------------------------------------- - * dns_process_replies() - * - * dequeue the resolved hostnames by adding them to the ipset - * - */ - -static void dns_process_replies(ipset *ips) -{ - dns_lock_replies(); - - if(!dns_replies) { - dns_unlock_replies(); - return; - } - - while(dns_replies) { - DNSREP *p; - - /* - * if(unlikely(debug)) - * char buf[IP2STR_MAX_LEN + 1]; - * fprintf(stderr, "%s: Got DNS REPLY '%s'\n", PROG, ip2str_r(buf, dns_replies->ip)); - */ - - ipset_add_ip_range(ips, dns_replies->ip, dns_replies->ip); - - p = dns_replies->next; - free(dns_replies); - dns_replies = p; - } - dns_unlock_replies(); -} - - -/* ---------------------------------------------------------------------------- - * dns_request() - * - * attempt to resolv a hostname - * the result (one or more) will be appended to the ipset supplied - * - * this is asynchronous - it will just queue the request and spawn worker - * threads to do the DNS resolution. - * - * the IPs will be added to the ipset, either at the next call to this - * function, or by calling dns_done(). - * - * So, to use it: - * 1. call dns_request() to request dns resolutions (any number) - * 2. call dns_done() when you finish requesting hostnames - * 3. the resolved IPs are in the ipset you supplied - * - * All ipset manipulation is done at this thread, so if control is - * outside the above 2 functions, you are free to do whatever you like - * with the ipset. - * - * Important: you cannot use dns_request() and dns_done() with more - * than 1 ipset at the same time. The resulting IPs will be multiplexed. - * When you call dns_done() on one ipset, you can proceed with the next. - * - */ - -static int dns_request(ipset *ips, char *hostname) -{ - DNSREQ *d; - - /* dequeue if possible */ - dns_process_replies(ips); - - /* - * if(unlikely(debug)) - * fprintf(stderr, "%s: Adding DNS REQUEST for '%s'\n", PROG, hostname); - */ - - d = malloc(sizeof(DNSREQ) + strlen(hostname) + 1); - if(!d) goto cleanup; - - strcpy(d->hostname, hostname); - d->tries = 20; - - /* add the request to the queue */ - if(dns_request_add(d)) - return -1; - - return 0; - - cleanup: - fprintf(stderr, "%s: out of memory, while trying to resolv '%s'\n", PROG, hostname); - return -1; -} - - -/* ---------------------------------------------------------------------------- - * dns_done() - * - * wait for the DNS requests made to finish. - * - */ - -static int dns_done(ipset *ips) -{ - unsigned long dots = 40, shown = 0, should_show = 0; - unsigned long pending, made, finished, retries, replies_found, replies_failed; - - if(ips) { ; } - - dns_lock_requests(); - made = dns_requests_made; - dns_unlock_requests(); - - if(!made) { - dns_reset_stats(); - return 0; - } - - while(1) { - dns_lock_requests(); - pending = dns_requests_pending; - made = dns_requests_made; - finished = dns_requests_finished; - retries = dns_requests_retries; - replies_found = dns_replies_found; - replies_failed = dns_replies_failed; - dns_unlock_requests(); - - if(!pending) break; - - if(unlikely(debug)) - fprintf(stderr, "%s: DNS: waiting %lu DNS resolutions to finish...\n", PROG, pending); - else if(dns_progress) { - should_show = dots * finished / made; - for(; shown < should_show; shown++) { - if(!(shown % 10)) fprintf(stderr, "%lu%%", shown * 100 / dots); - else fprintf(stderr, "."); - } - } - - dns_process_replies(ips); - - if(pending) { - dns_signal_threads(); - sleep(1); - } - } - dns_process_replies(ips); - - dns_lock_requests(); - made = dns_requests_made; - retries = dns_requests_retries; - replies_found = dns_replies_found; - replies_failed = dns_replies_failed; - dns_unlock_requests(); - - if(unlikely(debug)) - fprintf(stderr, "%s: DNS: made %lu DNS requests, failed %lu, retries: %lu, IPs got %lu, threads used %d of %d\n", PROG, made, replies_failed, retries, replies_found, dns_threads, dns_threads_max); - else if(dns_progress) { - for(; shown <= dots; shown++) { - if(!(shown % 10)) fprintf(stderr, "%lu%%", shown * 100 / dots); - else fprintf(stderr, "."); - } - fprintf(stderr, "\n"); - } - - dns_reset_stats(); - return (replies_failed != 0); -} /* ---------------------------------------------------------------------------- * ipset_load() @@ -852,7 +365,6 @@ ipset *ipset_load(const char *filename) { if(unlikely(debug)) fprintf(stderr, "%s: DNS resolution for hostname '%s' from line %d of file %s.\n", PROG, ipstr, lineid, ips->filename); - /* resolve_hostname(ips, ipstr); */ if(unlikely(dns_request(ips, ipstr))) { if(likely(fp != stdin)) fclose(fp); dns_reset_stats(); @@ -888,12 +400,5 @@ ipset *ipset_load(const char *filename) { if(unlikely(debug)) fprintf(stderr, "%s: Loaded %s %s\n", PROG, (ips->flags & IPSET_FLAG_OPTIMIZED)?"optimized":"non-optimized", ips->filename); - /* - * if(unlikely(!ips->entries)) { - * free(ips); - * return NULL; - * } - */ - return ips; } diff --git a/src/ipset_load.h b/src/ipset_load.h index 669be2e..b922eb9 100644 --- a/src/ipset_load.h +++ b/src/ipset_load.h @@ -1,10 +1,6 @@ #ifndef IPRANGE_IPSET_LOAD_H #define IPRANGE_IPSET_LOAD_H -extern int dns_threads_max; -extern int dns_silent; -extern int dns_progress; - extern ipset *ipset_load(const char *filename); #endif //IPRANGE_IPSET_LOAD_H diff --git a/src/ipset_optimize.c b/src/ipset_optimize.c index f608128..8278a03 100644 --- a/src/ipset_optimize.c +++ b/src/ipset_optimize.c @@ -82,7 +82,7 @@ inline void ipset_optimize(ipset *ips) { * overlaps or is adjustent to the last * then merge it = extent the broadcast of the last */ - if (oaddrs[i].addr <= hi + 1) { + if (oaddrs[i].addr <= hi || (hi != UINT32_MAX && oaddrs[i].addr == hi + 1)) { hi = oaddrs[i].broadcast; continue; } From acc888234ac6c6acff09fa0627910dfa88498a96 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Sat, 4 Apr 2026 19:21:57 +0300 Subject: [PATCH 5/9] Rewrite README as comprehensive reference documentation Complete rewrite covering all input formats, operations, output modes, address family behavior, DNS resolution, prefix control, and feature detection flags. Organized with tables for quick scanning. Includes practical examples for firewall optimization, blocklist management, and IPv6 workflows. --- README.md | 444 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 309 insertions(+), 135 deletions(-) diff --git a/README.md b/README.md index 6addb61..5bf5535 100644 --- a/README.md +++ b/README.md @@ -1,143 +1,334 @@ # iprange -`iprange` is a fast command-line tool for reading, normalizing, comparing, and exporting IPv4 and IPv6 address sets. - -It understands single IPs, CIDRs, netmasks, numeric IPs, ranges, and hostnames. You can use it to merge blocklists, compute intersections or exclusions, generate data for `ipset restore`, or compare multiple IP sets as CSV. - -## What it can read - -`iprange` accepts one entry per line and can mix formats in the same input: - -- single IPs - - `1.2.3.4` -- CIDRs - - `1.2.3.0/24` -- dotted netmasks - - `1.2.3.0/255.255.255.0` -- abbreviated IPs - - `10.1` - - `10.1.1` -- IP ranges - - `1.2.3.0 - 1.2.3.255` -- ranges where both sides use CIDR or netmask notation -- numeric IPs -- hostnames - -In IPv6 mode (`-6`), it additionally accepts: - -- IPv6 addresses - - `2001:db8::1` -- IPv6 CIDRs - - `2001:db8::/32` -- IPv6 ranges - - `2001:db8::1 - 2001:db8::ff` -- compressed and full notation - - `::1`, `2001:0db8:0000:0000:0000:0000:0000:0001` -- IPv4-mapped IPv6 - - `::ffff:10.0.0.1` -- plain IPv4 (normalized to `::ffff:x.x.x.x` in IPv6 mode) - -Important input behavior: - -- Hostnames are resolved in parallel. -- Comments after `#` or `;` are ignored. -- In IPv4 mode (default), parsing uses `inet_aton()`, so octal and hex forms are accepted too. -- In IPv6 mode, parsing uses `inet_pton(AF_INET6)`. -- Inputs can come from `stdin`, files, file lists, or directory expansion. - -## Main modes - -- `union` / `merge` / `optimize` - - merge all inputs and print the normalized result -- `common` - - print the intersection of all inputs -- `exclude-next` - - merge the inputs before the option, then remove anything matched by the inputs after it -- `ipset-reduce` - - trade a controlled increase in entries for fewer prefixes -- `compare` - - compare all inputs against all other inputs and print CSV -- `compare-first` - - compare the first input against every other input -- `compare-next` - - compare one group of inputs against the next group -- `count-unique` - - merge all inputs and print CSV counts -- `count-unique-all` - - print one CSV count line per input - -## Quick examples - -Merge and normalize: +`iprange` is a fast command-line tool for managing IPv4 and IPv6 address sets. It reads IP addresses, CIDRs, ranges, and hostnames in any combination, normalizes them into optimal non-overlapping sets, and performs set operations (union, intersection, difference, complement). It can also compare sets as CSV, reduce prefix diversity for firewall performance, and produce binary output for fast round-trips. + +For 1 million input lines, a merge completes in under a second. + +## Input formats + +`iprange` accepts one entry per line. All formats can be mixed in the same file. + +### IPv4 (default mode) + +| Format | Example | Notes | +|--------|---------|-------| +| Single IP | `1.2.3.4` | | +| CIDR | `1.2.3.0/24` | Network address applied by default | +| Netmask | `1.2.3.0/255.255.255.0` | Equivalent to /24 | +| Range | `1.2.3.0 - 1.2.3.255` | Dash with optional spaces | +| CIDR range | `1.2.3.0/24 - 1.2.4.0/24` | Network of first to broadcast of second | +| Abbreviated | `10.1` | Expands via `inet_aton()` | +| Numeric | `16909060` | Integer, parsed by `inet_aton()` | +| Octal | `012.0.0.1` | Components starting with 0 are octal | +| Hex | `0x0A000001` | Components starting with 0x are hex | +| Hostname | `example.com` | Resolved via parallel DNS | + +Parsing uses `inet_aton()`, so all numeric forms it accepts (decimal integers, octal, hex, mixed) are supported. This is intentional and documented behavior. + +### IPv6 (`-6` mode) + +| Format | Example | Notes | +|--------|---------|-------| +| Full notation | `2001:0db8:0000:0000:0000:0000:0000:0001` | | +| Compressed | `2001:db8::1` | Standard `::` compression | +| Loopback | `::1` | | +| CIDR | `2001:db8::/32` | Prefix 0-128 | +| Range | `2001:db8::1 - 2001:db8::ff` | | +| IPv4-mapped | `::ffff:10.0.0.1` | | +| Plain IPv4 | `10.0.0.1` | Normalized to `::ffff:10.0.0.1` | +| Hostname | `example.com` | Both AAAA and A records resolved | + +Parsing uses `inet_pton(AF_INET6)`. + +### Comments and whitespace + +- Lines starting with `#` or `;` are comments. +- Inline comments after `#` or `;` are stripped from data lines. +- Empty lines and leading/trailing whitespace are ignored. + +### File inputs + +| Syntax | Meaning | +|--------|---------| +| `file.txt` | Load a single file as one ipset | +| `-` | Read from stdin | +| `@filelist.txt` | Load a file list (one filename per line, comments allowed) | +| `@directory/` | Load all regular files in directory (sorted, no recursion) | +| `file.txt as name` | Override the name shown in CSV output | + +When no files are given, stdin is assumed. + +Feature detection for scripts: +```bash +iprange --has-filelist-loading && echo "supports @filename" +iprange --has-directory-loading && echo "supports @directory" +``` + +### Binary input + +Binary files (produced by `--print-binary`) are auto-detected by their header. IPv4 binary uses format v1.0; IPv6 uses v2.0. Loading a binary file of the wrong family is an error. + +## Address family + +| Flag | Meaning | +|------|---------| +| *(none)* | IPv4 mode (default for text input) | +| `--ipv4` / `-4` | Explicit IPv4 mode | +| `--ipv6` / `-6` | IPv6 mode | + +Rules: + +- Without `-4` or `-6`, text input defaults to IPv4 mode. +- In IPv6 mode, plain IPv4 input is accepted and normalized to `::ffff:x.x.x.x`. +- In IPv4 mode, `::ffff:x.x.x.x` is converted back to IPv4. All other IPv6 input is dropped with one summary warning. +- Operations between IPv4 and IPv6 datasets are not supported. +- Mixed-family range endpoints (e.g., `10.0.0.1 - 2001:db8::1`) are invalid. +- Binary files declare their family in the header. +- Feature detection: `iprange --has-ipv6` exits 0 if IPv6 is supported. + +## Operations + +### Merge / Union (default) + +Merge all inputs into one sorted, deduplicated set. ```bash iprange blocklist-a.txt blocklist-b.txt ``` -Find common IPs: +Aliases: `--optimize`, `--combine`, `--merge`, `--union`, `--union-all`, `-J` + +### Intersection + +Print only the IPs common to all inputs. ```bash iprange --common blocklist-a.txt blocklist-b.txt ``` -Exclude one set from another: +Aliases: `--common`, `--intersect`, `--intersect-all` + +### Complement (exclude) + +Merge all files before `--except`, then remove all IPs matched by the files after it. + +```bash +iprange allow.txt --except deny.txt +``` + +Aliases: `--except`, `--exclude-next`, `--complement`, `--complement-next` + +### Symmetric difference + +Print IPs that exist in either A or B, but not both. Exits 1 if there are differences, 0 if the sets are equal. + +```bash +iprange before.txt --diff after.txt +echo $? # 0 = identical, 1 = different +``` + +Use `--quiet` to suppress the output and only check the exit code. + +Aliases: `--diff`, `--diff-next` + +### Reduce prefixes + +Merge all inputs, then reduce the number of distinct CIDR prefixes while allowing a controlled increase in entry count. This optimizes netfilter/iptables `hash:net` ipsets, where each distinct prefix adds a lookup but entry count does not affect performance. + +```bash +iprange --ipset-reduce 20 blocklist.txt +``` + +Parameters: + +| Option | Default | Meaning | +|--------|---------|---------| +| `--ipset-reduce PERCENT` | 20 | Allow this % increase in entries | +| `--ipset-reduce-entries ENTRIES` | 16384 | Minimum acceptable entry count | + +The tool computes the maximum acceptable entries as `max(current * (1 + PERCENT/100), ENTRIES)`, then iteratively eliminates the prefix with the smallest cost until the limit is reached. The result matches exactly the same set of IPs. + +Use `-v` to see the elimination steps and prefix breakdown. + +Aliases: `--reduce-factor`, `--reduce-entries` + +### Compare (CSV) + +Compare all files pairwise and print CSV with entry counts, unique IPs, combined IPs, and common IPs. + +```bash +iprange --compare --header blocklist-a.txt blocklist-b.txt blocklist-c.txt +``` + +**Compare first**: compare the first file against every other: +```bash +iprange --compare-first --header reference.txt other1.txt other2.txt +``` +**Compare next**: compare files before the option against files after: ```bash -iprange allow.txt --exclude-next deny.txt +iprange --compare-next --header before1.txt before2.txt --compare-next after1.txt after2.txt ``` -Count unique entries: +### Count unique (CSV) + +Merge all inputs and print a single CSV line with entry and unique IP counts: +```bash +iprange --count-unique --header blocklist.txt +``` +Print one CSV line per input file: ```bash -iprange -C blocklist-a.txt blocklist-b.txt iprange --count-unique-all --header blocklist-a.txt blocklist-b.txt ``` -Generate single-IP output: +## Output formats + +### CIDR (default) + +Outputs optimal non-overlapping CIDR blocks: +``` +10.0.0.0/24 +10.0.1.0/25 +10.0.1.128/26 +``` + +### Ranges (`--print-ranges` / `-j`) + +``` +10.0.0.0-10.0.0.255 +10.0.1.0-10.0.1.191 +``` + +### Single IPs (`--print-single-ips` / `-1`) + +Enumerates every individual IP. Ranges larger than 16,777,216 IPs (256^3) are skipped with a warning to prevent unbounded output. +### Binary (`--print-binary`) + +Fast machine-readable format for the same architecture (no endianness conversion). Use for caching and fast round-trips: ```bash -iprange -1 hosts.txt +iprange --print-binary blocklist.txt > cache.bin +iprange cache.bin # reads binary, outputs CIDR ``` -Generate binary output for fast round-trips on the same architecture: +### Prefix and suffix strings + +Customize output for ipset restore, iptables rules, or other tools: ```bash -iprange --print-binary blocklist.txt > blocklist.bin -iprange blocklist.bin +# Generate ipset restore commands +iprange --print-prefix "add myset " blocklist.txt + +# Different prefixes for single IPs vs networks +iprange --print-prefix-ips "add ips " --print-prefix-nets "add nets " blocklist.txt + +# Add suffixes +iprange --print-suffix " timeout 3600" blocklist.txt ``` -Generate `ipset restore`-style lines: +### Prefix control + +Limit which CIDR prefixes appear in output: ```bash -iprange --print-prefix 'add myset ' --print-suffix '' blocklist.txt +# Only generate /24 to /32 (no large blocks) +iprange --min-prefix 24 blocklist.txt + +# Only use specific prefixes +iprange --prefixes 24,32 blocklist.txt ``` -## Address family +Warning: restricting prefixes can dramatically increase the number of output entries. + +## DNS resolution + +Hostnames in input files are resolved in parallel using a thread pool. + +| Option | Default | Meaning | +|--------|---------|---------| +| `--dns-threads N` | 5 | Number of parallel DNS queries | +| `--dns-silent` | off | Suppress DNS error messages | +| `--dns-progress` | off | Show resolution progress bar | + +In IPv4 mode, only A records are resolved. In IPv6 mode, both AAAA and A records are resolved (A records normalized to `::ffff:x.x.x.x`). + +Temporary failures (EAI_AGAIN) are retried up to 20 times. Permanent failures are reported to stderr (unless `--dns-silent`). + +## Input behavior + +| Option | Default | Meaning | +|--------|---------|---------| +| `--dont-fix-network` | off | Disable network address normalization (`1.1.1.17/24` read as `1.1.1.17-1.1.1.255` instead of `1.1.1.0/24`) | +| `--default-prefix N` / `-p N` | 32 | Default prefix for bare IPs without a mask | + +## Feature detection -By default, `iprange` operates in IPv4 mode. Use `-6` / `--ipv6` for IPv6: +For scripts that need to check which features are available: ```bash -# IPv6 merge -iprange -6 blocklist-v6.txt +iprange --has-compare && echo "compare modes available" +iprange --has-reduce && echo "reduce mode available" +iprange --has-filelist-loading && echo "@filename supported" +iprange --has-directory-loading && echo "@directory supported" +iprange --has-ipv6 && echo "IPv6 supported" +``` + +Each flag exits 0 if the feature is present, 1 otherwise. + +## Examples + +### Firewall optimization -# IPv6 count -iprange -6 -C blocklist-v6.txt +Reduce a country blocklist for optimal ipset performance: -# IPv4 input normalized to mapped IPv6 -echo "10.0.0.1" | iprange -6 -# output: ::ffff:10.0.0.1 +```bash +# Before: 406 entries using 18 prefixes (18 lookups per packet) +iprange -v country_gr.netset >/dev/null -# Explicit IPv4 mode (same as default) -iprange -4 blocklist.txt +# After: 4326 entries using 3 prefixes (3 lookups per packet) +# Same 6.3 million unique IPs matched +iprange -v --ipset-reduce 20 country_gr.netset >/dev/null ``` -Key rules: -- Without `-4` or `-6`, text input defaults to IPv4 mode. -- In IPv6 mode, plain IPv4 input is accepted and normalized to `::ffff:x.x.x.x`. -- Operations between IPv4 and IPv6 datasets are not supported. -- Mixed-family range endpoints (e.g., `10.0.0.1 - 2001:db8::1`) are invalid. -- Binary files declare their family in the header. -- Feature detection: `iprange --has-ipv6` exits with 0 if IPv6 is supported. +The reduction is lossless: piping the reduced output back through `iprange` reproduces the original set exactly. + +### Blocklist management + +```bash +# Merge multiple blocklists into one optimized set +iprange list1.txt list2.txt list3.txt > merged.txt + +# Find IPs that appear in all blocklists +iprange --common list1.txt list2.txt list3.txt > common.txt + +# Create a blocklist but exclude your own networks +iprange merged.txt --except my-networks.txt > final.txt + +# Check if two versions of a blocklist differ +iprange old.txt --diff new.txt --quiet +echo $? # 0 = no changes, 1 = changed + +# Compare overlap between blocklists +iprange --compare --header list1.txt list2.txt list3.txt +``` + +### IPv6 workflows + +```bash +# Merge IPv6 blocklists +iprange -6 v6-list1.txt v6-list2.txt + +# Mix IPv4 and IPv6 in one file (IPv6 mode normalizes IPv4) +iprange -6 mixed-input.txt + +# Count unique IPv6 addresses +iprange -6 -C v6-list.txt + +# Binary cache for IPv6 +iprange -6 --print-binary large-v6.txt > cache-v6.bin +iprange -6 cache-v6.bin +``` ## Build and install @@ -158,50 +349,33 @@ make make install ``` -If you do not want to build the man page: - -```bash -./configure --disable-man -``` +To skip the man page: `./configure --disable-man` ## Testing -Project test entry points: - -- `./run-tests.sh` - - CLI regression suite -- `./run-build-tests.sh` - - build and layout regressions -- `./run-sanitizer-tests.sh` - - ASAN/UBSAN/TSAN coverage -- `make check` - - normal build-integrated test path -- `make check-sanitizers` - - sanitizer-integrated test path +| Command | What it tests | +|---------|---------------| +| `make check` | Full test suite (CLI + build) | +| `./run-tests.sh` | CLI regression tests | +| `./run-build-tests.sh` | Build and layout regressions | +| `./run-sanitizer-tests.sh` | ASAN/UBSAN/TSAN coverage | +| `make check-sanitizers` | Sanitizer-integrated path | ## Repository layout -- `src/` - - C sources and headers -- `packaging/` - - packaging helpers, spec template, ebuild, and release tooling -- `tests.d/` - - CLI regression tests -- `tests.build.d/` - - build and layout regressions -- `tests.sanitizers.d/` - - sanitizer CLI regressions -- `tests.tsan.d/` - - TSAN regressions -- `tests.unit/` - - unit-style harnesses for internal edge cases +| Directory | Contents | +|-----------|----------| +| `src/` | C sources and headers | +| `packaging/` | Spec template, ebuild, release tooling | +| `tests.d/` | CLI regression tests | +| `tests.build.d/` | Build and layout regressions | +| `tests.sanitizers.d/` | Sanitizer CLI regressions | +| `tests.tsan.d/` | TSAN regressions | +| `tests.unit/` | Unit-style internal harnesses | ## Getting help -For the full option list: - ```bash -iprange --help +iprange --help # full option reference +iprange --version # version and copyright ``` - -The project wiki content that originally documented the feature set is now folded into this README so the repository landing page explains the tool directly. From 1d8b50264043bbff71fc23980cb38e32b6941ef0 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Sat, 4 Apr 2026 19:28:48 +0300 Subject: [PATCH 6/9] Add comprehensive documentation in docs/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move documentation from the wiki into the repository so it is versioned with the code. The wiki now redirects here. - docs/input-formats.md — every accepted format, file lists, directories, binary - docs/output-formats.md — CIDR, ranges, single IPs, binary, CSV, prefix/suffix - docs/operations.md — merge, intersect, exclude, diff, reduce, compare, count - docs/ipv6.md — address family, normalization, cross-family rules - docs/dns-resolution.md — threading, retry, configuration - docs/ipset-reduce.md — prefix reduction tutorial with examples - README.md updated with documentation section linking to docs/ --- README.md | 12 ++++ docs/dns-resolution.md | 55 ++++++++++++++++ docs/input-formats.md | 133 ++++++++++++++++++++++++++++++++++++++ docs/ipset-reduce.md | 129 +++++++++++++++++++++++++++++++++++++ docs/ipv6.md | 100 +++++++++++++++++++++++++++++ docs/operations.md | 139 ++++++++++++++++++++++++++++++++++++++++ docs/output-formats.md | 141 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 709 insertions(+) create mode 100644 docs/dns-resolution.md create mode 100644 docs/input-formats.md create mode 100644 docs/ipset-reduce.md create mode 100644 docs/ipv6.md create mode 100644 docs/operations.md create mode 100644 docs/output-formats.md diff --git a/README.md b/README.md index 5bf5535..47f2534 100644 --- a/README.md +++ b/README.md @@ -366,6 +366,7 @@ To skip the man page: `./configure --disable-man` | Directory | Contents | |-----------|----------| | `src/` | C sources and headers | +| `docs/` | Detailed documentation | | `packaging/` | Spec template, ebuild, release tooling | | `tests.d/` | CLI regression tests | | `tests.build.d/` | Build and layout regressions | @@ -373,6 +374,17 @@ To skip the man page: `./configure --disable-man` | `tests.tsan.d/` | TSAN regressions | | `tests.unit/` | Unit-style internal harnesses | +## Documentation + +Detailed guides in the [`docs/`](docs/) directory: + +- [Input formats](docs/input-formats.md) — every accepted format, file lists, directories, binary +- [Output formats](docs/output-formats.md) — CIDR, ranges, single IPs, binary, CSV, prefix/suffix +- [Operations](docs/operations.md) — merge, intersect, exclude, diff, reduce, compare, count +- [IPv6 support](docs/ipv6.md) — address family, normalization, cross-family rules +- [DNS resolution](docs/dns-resolution.md) — threading, retry, configuration +- [Optimizing ipsets for iptables](docs/ipset-reduce.md) — prefix reduction with examples + ## Getting help ```bash diff --git a/docs/dns-resolution.md b/docs/dns-resolution.md new file mode 100644 index 0000000..a3893b9 --- /dev/null +++ b/docs/dns-resolution.md @@ -0,0 +1,55 @@ +# DNS resolution + +When input files contain hostnames (one per line), `iprange` resolves them in parallel using a thread pool. + +## Configuration + +| Option | Default | Meaning | +|--------|---------|---------| +| `--dns-threads N` | 5 | Maximum number of parallel DNS queries | +| `--dns-silent` | off | Suppress all DNS error messages | +| `--dns-progress` | off | Show a progress bar during resolution | + +## How it works + +1. As each input line is parsed, hostnames are queued for resolution. +2. Worker threads pick requests from the queue and call `getaddrinfo()`. +3. Resolved IPs are added to a reply queue. +4. The main thread drains the reply queue periodically and after all requests finish. + +Threads are created on demand up to `--dns-threads`. If the queue grows faster than threads can process, new threads are spawned up to the limit. + +## Address family behavior + +| Mode | Records resolved | Normalization | +|------|-----------------|---------------| +| IPv4 (default / `-4`) | A records only | None | +| IPv6 (`-6`) | AAAA and A records | A results mapped to `::ffff:x.x.x.x` | + +In IPv6 mode, a hostname that has both AAAA and A records will contribute all addresses — IPv6 addresses directly, IPv4 addresses as IPv4-mapped IPv6. + +## Retry and error handling + +- **Temporary failures** (`EAI_AGAIN`): retried up to 20 times with 1-second delays between retry cycles. +- **Permanent failures** (`EAI_NONAME`, `EAI_FAIL`, etc.): logged to stderr and counted. +- **System errors** (`EAI_SYSTEM`, `EAI_MEMORY`): logged to stderr. + +After all resolutions complete, if any hostname permanently failed, the entire load fails (returns error). Use `--dns-silent` to suppress the per-hostname error messages, but the load will still fail. + +## Hostname detection + +A line is treated as a hostname when: +- It contains only hostname-valid characters (alphanumeric, dot, hyphen, underscore) +- It does not look like a valid IP address or CIDR +- It appears alone on the line (optionally followed by a comment) + +Lines that look like IPs but fail to parse are treated as errors, not hostnames. This prevents typos like `1.2.3.999` from triggering DNS resolution. + +Hostnames cannot appear as range endpoints. A line like `host1.example.com - host2.example.com` is invalid. + +## Performance notes + +- With the default 5 threads, `iprange` can resolve hundreds of hostnames per second. +- For files with thousands of hostnames, increase `--dns-threads` (e.g., 50-100). +- DNS results are added to the ipset as they arrive, so resolution overlaps with continued file parsing. +- Each hostname resolution is independent — one slow or failing hostname does not block others. diff --git a/docs/input-formats.md b/docs/input-formats.md new file mode 100644 index 0000000..cdfa22c --- /dev/null +++ b/docs/input-formats.md @@ -0,0 +1,133 @@ +# Input formats + +`iprange` accepts one entry per line. All formats can coexist in the same file. + +## IPv4 (default mode) + +### Addresses and CIDRs + +| Format | Example | Expansion | +|--------|---------|-----------| +| Dotted decimal | `1.2.3.4` | Single IP | +| CIDR prefix | `1.2.3.0/24` | 1.2.3.0 - 1.2.3.255 | +| Dotted netmask | `1.2.3.0/255.255.255.0` | Same as /24 | +| Abbreviated | `10.1` | `inet_aton()` expansion | +| Decimal integer | `16909060` | 1.2.3.4 | +| Octal | `012.0.0.1` | 10.0.0.1 (leading zero = octal) | +| Hex | `0x0A000001` | 10.0.0.1 | + +IPv4 parsing uses `inet_aton()`, which accepts all the above forms. Be careful with leading zeros — `010.0.0.1` is octal 8.0.0.1, not decimal 10.0.0.1. + +By default, CIDRs are normalized to the network address: `1.1.1.17/24` is read as `1.1.1.0/24`. Use `--dont-fix-network` to disable this. + +The default prefix for bare IPs (no `/` suffix) is /32. Change with `--default-prefix N`. + +### Ranges + +| Format | Example | Meaning | +|--------|---------|---------| +| IP range | `1.2.3.0 - 1.2.3.255` | Explicit start-end | +| CIDR range | `1.2.3.0/24 - 1.2.4.0/24` | Network of first to broadcast of second | +| Mixed | `1.2.3.0/24 - 1.2.4.0/255.255.255.0` | CIDR and netmask can be mixed | + +The dash can have optional spaces around it. + +### Hostnames + +Hostnames (one per line) are resolved via parallel DNS queries. In IPv4 mode, only A records are resolved. If a hostname resolves to multiple IPs, all are added. + +See [DNS resolution](dns-resolution.md) for threading and configuration. + +## IPv6 (`-6` mode) + +### Addresses and CIDRs + +| Format | Example | Notes | +|--------|---------|-------| +| Full notation | `2001:0db8:0000:0000:0000:0000:0000:0001` | | +| Compressed | `2001:db8::1` | Standard `::` compression | +| Loopback | `::1` | | +| CIDR | `2001:db8::/32` | Prefix 0-128 | +| IPv4-mapped | `::ffff:10.0.0.1` | | +| Plain IPv4 | `10.0.0.1` | Auto-normalized to `::ffff:10.0.0.1` | + +IPv6 parsing uses `inet_pton(AF_INET6)`. + +### Ranges + +IPv6 ranges use the same `addr1 - addr2` syntax. Both endpoints must be the same address family — a range like `10.0.0.1 - 2001:db8::1` is rejected as a mixed-family error. + +### Hostnames + +In IPv6 mode, hostnames are resolved for both AAAA and A records. A-record results are normalized to IPv4-mapped IPv6 (`::ffff:x.x.x.x`). + +## Comments and whitespace + +- `#` or `;` at the start of a line marks it as a comment. +- `#` or `;` after an IP/range/hostname starts an inline comment (rest of line ignored). +- Empty lines and leading/trailing whitespace are silently skipped. + +## File inputs + +### Regular files + +```bash +iprange file1.txt file2.txt file3.txt +``` + +Each file argument is loaded as a separate ipset. For modes like `--compare`, each file appears as a separate column in the output. + +### stdin + +```bash +cat blocklist.txt | iprange - +# or just: +cat blocklist.txt | iprange +``` + +If no file arguments are given, stdin is assumed. Explicit `-` reads stdin. + +### File lists (`@filename`) + +```bash +iprange @my-lists.txt +``` + +The file `my-lists.txt` contains one filename per line. Comments (`#`, `;`) and empty lines are ignored. Each listed file is loaded as a separate ipset. + +``` +# my-lists.txt +/path/to/blocklist-a.txt +/path/to/blocklist-b.txt +# /path/to/disabled.txt +``` + +Feature detection: `iprange --has-filelist-loading` exits 0 if supported. + +### Directory loading (`@directory`) + +```bash +iprange @/etc/firehol/ipsets/ +``` + +All regular files in the directory are loaded (sorted alphabetically), each as a separate ipset. Subdirectories are not traversed. + +Feature detection: `iprange --has-directory-loading` exits 0 if supported. + +### Naming for CSV output + +Any file argument can be followed by `as NAME` to override its name in CSV output: + +```bash +iprange --compare --header file1.txt as "Blocklist A" file2.txt as "Blocklist B" +``` + +## Binary input + +Binary files (produced by `--print-binary`) are auto-detected by their header line: +- IPv4 binary: format v1.0 +- IPv6 binary: format v2.0 + +Loading a binary file of the wrong family is an error. In IPv4 mode, an IPv6 binary file is rejected. In IPv6 mode, an IPv4 binary file is rejected. + +Binary files are architecture-specific (no endianness conversion). They are intended as a same-machine cache, not a portable interchange format. diff --git a/docs/ipset-reduce.md b/docs/ipset-reduce.md new file mode 100644 index 0000000..ec32f25 --- /dev/null +++ b/docs/ipset-reduce.md @@ -0,0 +1,129 @@ +# Optimizing ipsets for iptables + +netfilter/iptables `hash:net` ipsets (netsets) are a fast way to manage IP lists for firewall rules. The number of entries in an ipset does not affect lookup performance. However, each **distinct prefix length** in the netset adds one extra lookup per packet. A netset using all 32 possible IPv4 prefixes forces 32 lookups per packet. + +`iprange --ipset-reduce` consolidates prefixes while keeping the matched IP set identical. For example, one /23 entry becomes two /24 entries — same IPs, one fewer prefix. + +## Parameters + +| Option | Default | Purpose | +|--------|---------|---------| +| `--ipset-reduce PERCENT` | 20 | Allow this % increase in entries | +| `--ipset-reduce-entries ENTRIES` | 16384 | Minimum absolute entry cap | + +You enable reduce mode by giving either option. The maximum acceptable entries is computed as: + +``` +max(current_entries * (1 + PERCENT / 100), ENTRIES) +``` + +This design works well across all netset sizes: +- Small netsets (hundreds of entries) are scaled up to ENTRIES +- Large netsets (hundreds of thousands) are scaled by PERCENT + +## Algorithm + +The algorithm is optimal: at each step it finds the prefix whose elimination adds the fewest new entries, merges it into the next available prefix, and repeats until the entry limit is reached. Use `-v` to see the elimination steps. + +## Example: country netset + +The GeoLite2 netset for Greece: + +```bash +$ iprange -C --header country_gr.netset +entries,unique_ips +406,6304132 +``` + +406 entries, 6.3 million unique IPs. The prefix breakdown (`-v`): + +``` +prefix /13 counts 1 entries +prefix /14 counts 3 entries +prefix /15 counts 7 entries +prefix /16 counts 42 entries +prefix /17 counts 19 entries +prefix /18 counts 17 entries +prefix /19 counts 21 entries +prefix /20 counts 21 entries +prefix /21 counts 30 entries +prefix /22 counts 50 entries +prefix /23 counts 50 entries +prefix /24 counts 98 entries +prefix /25 counts 4 entries +prefix /27 counts 2 entries +prefix /28 counts 7 entries +prefix /29 counts 25 entries +prefix /31 counts 3 entries +prefix /32 counts 6 entries +``` + +**18 distinct prefixes** = 18 lookups per packet. + +After reduction with 20% entry increase: + +```bash +$ iprange -v --ipset-reduce 20 country_gr.netset >/dev/null +Eliminated 15 out of 18 prefixes (3 remain in the final set). + +prefix /21 counts 3028 entries +prefix /24 counts 398 entries +prefix /32 counts 900 entries +``` + +**3 prefixes, 4,326 entries** — same 6.3 million unique IPs. The kernel now does 3 lookups instead of 18. + +With a higher entry cap: + +```bash +$ iprange -v --ipset-reduce 20 --ipset-reduce-entries 50000 country_gr.netset >/dev/null +Eliminated 16 out of 18 prefixes (2 remain in the final set). + +prefix /24 counts 24622 entries +prefix /32 counts 900 entries +``` + +**2 prefixes, 25,522 entries** — one more prefix eliminated thanks to the higher entry budget. + +## Example: large blocklist + +A large blocklist (218,307 entries, 25 prefixes, 765 million IPs): + +```bash +$ iprange -v --ipset-reduce 20 --ipset-reduce-entries 50000 \ + ib_bluetack_level1.netset >/dev/null +Eliminated 17 out of 25 prefixes (8 remain in the final set). + +prefix /16 counts 11118 entries +prefix /20 counts 5216 entries +prefix /24 counts 46718 entries +prefix /26 counts 17902 entries +prefix /27 counts 18123 entries +prefix /28 counts 32637 entries +prefix /29 counts 94802 entries +prefix /32 counts 33570 entries +``` + +From 25 prefixes to 8, entries from 218,307 to 260,086. At 50%: 6 prefixes. At 100%: 5 prefixes. + +## Lossless round-trip + +The reduction is lossless. Piping reduced output back through `iprange` reproduces the original optimized set: + +```bash +iprange --ipset-reduce 100 blocklist.txt | iprange -v >/dev/null +# output is identical to: iprange -v blocklist.txt >/dev/null +``` + +## Typical usage + +```bash +# Moderate reduction (good default) +iprange --ipset-reduce 20 blocklist.txt > reduced.txt + +# Aggressive reduction for small lists +iprange --ipset-reduce 20 --ipset-reduce-entries 50000 country.netset > reduced.txt + +# Generate ipset restore commands from reduced set +iprange --ipset-reduce 20 --print-prefix "add myset " blocklist.txt +``` diff --git a/docs/ipv6.md b/docs/ipv6.md new file mode 100644 index 0000000..1da23e2 --- /dev/null +++ b/docs/ipv6.md @@ -0,0 +1,100 @@ +# IPv6 support + +`iprange` supports IPv6 with the `-6` / `--ipv6` flag. One invocation operates on one address family — there is no mixed-family mode. + +## Address family selection + +| Flag | Behavior | +|------|----------| +| *(none)* | IPv4 mode (default for text input) | +| `-4` / `--ipv4` | Explicit IPv4 mode | +| `-6` / `--ipv6` | IPv6 mode | + +Feature detection for scripts: +```bash +iprange --has-ipv6 && echo "IPv6 supported" +``` + +## Normalization rules + +### IPv6 mode + +- IPv6 input is parsed directly via `inet_pton(AF_INET6)`. +- Plain IPv4 input is accepted and normalized to IPv4-mapped IPv6 (`::ffff:x.x.x.x`). +- Hostnames are resolved for both AAAA and A records. A-record results are normalized to `::ffff:x.x.x.x`. +- The default prefix for bare IPv6 addresses is /128. + +### IPv4 mode + +- IPv4 input is parsed normally. +- IPv4-mapped IPv6 (`::ffff:x.x.x.x`) is recognized and converted back to the IPv4 address. +- All other IPv6 input is dropped. A single summary warning is printed per file: `N IPv6 entries dropped (use -6 for IPv6 mode)`. +- Hostnames are resolved for A records only. + +## Cross-family rules + +- Operations between IPv4 and IPv6 datasets are not supported. Each invocation works with one family. +- Mixed-family range endpoints (e.g., `10.0.0.1 - 2001:db8::1`) are rejected. +- Binary files declare their family in the header. Loading a binary file of the wrong family is an error. + +## Supported operations + +All operations available in IPv4 mode work identically in IPv6 mode: + +- Merge / union +- Intersection +- Complement / exclude +- Symmetric difference +- Prefix reduction (`--ipset-reduce`) +- All comparison and counting modes +- All output formats (CIDR, ranges, single IPs, binary, prefix/suffix) + +### Prefix control for IPv6 + +- `--min-prefix N`: restrict to prefixes N through 128 +- `--prefixes N,N,N,...`: allow only specific prefix lengths (128 always enabled) + +### Single IP cap + +The `--print-single-ips` safety cap (16,777,216 IPs) applies in IPv6 mode too. Ranges exceeding this are skipped with a warning. + +## Binary format + +IPv6 uses binary format v2.0 (IPv4 uses v1.0). The formats are not interchangeable: + +| Format | Family | Address size | Header | +|--------|--------|-------------|--------| +| v1.0 | IPv4 | 4 bytes per address | `iprange binary format v1.0` | +| v2.0 | IPv6 | 16 bytes per address | `iprange binary format v2.0` | + +Binary files are auto-detected by their header. Same-architecture only (no endianness conversion). + +## Examples + +```bash +# Merge IPv6 blocklists +iprange -6 v6-list1.txt v6-list2.txt + +# IPv4 input normalized to mapped IPv6 +echo "10.0.0.1" | iprange -6 +# output: ::ffff:a00:1/128 + +# Count unique IPv6 addresses +iprange -6 -C --header v6-list.txt + +# Find common IPs between IPv6 lists +iprange -6 --common list1-v6.txt list2-v6.txt + +# Binary cache for IPv6 +iprange -6 --print-binary large-v6.txt > cache.bin +iprange -6 cache.bin + +# Compare IPv6 blocklists +iprange -6 --compare --header v6-a.txt v6-b.txt v6-c.txt +``` + +## Internal representation + +IPv6 addresses are stored as `__uint128_t` (128-bit unsigned integer). This requires GCC or Clang with `__uint128_t` support. The `configure` script checks for this at build time. + +All set operations (merge, common, exclude, diff, optimize) have dedicated IPv6 implementations that operate on 128-bit address pairs, following the same algorithms as the IPv4 versions. diff --git a/docs/operations.md b/docs/operations.md new file mode 100644 index 0000000..a8e0200 --- /dev/null +++ b/docs/operations.md @@ -0,0 +1,139 @@ +# Operations + +All operations produce sorted, non-overlapping output. The output family matches the input family (IPv4 or IPv6). + +## Merge / Union (default) + +Merge all inputs into one optimized set. This is the default when no mode option is given. + +```bash +iprange file1.txt file2.txt file3.txt +``` + +Overlapping and adjacent ranges are combined. Duplicates are eliminated. The result is the union of all input sets. + +Aliases: `--optimize`, `--combine`, `--merge`, `--union`, `--union-all`, `-J` + +## Intersection (`--common`) + +Print only the IPs that appear in **all** input files. + +```bash +iprange --common file1.txt file2.txt file3.txt +``` + +If any file has no overlap with the others, the output is empty. + +Aliases: `--common`, `--intersect`, `--intersect-all` + +## Complement / Exclude (`--except`) + +This is a **positional** operation. Files before `--except` form set A; files after form set B. The output is A minus B. + +```bash +iprange allow.txt --except deny.txt +``` + +Multiple files can appear on either side: + +```bash +iprange a1.txt a2.txt --except b1.txt b2.txt b3.txt +``` + +Files before `--except` are merged into A. Each file after is subtracted from A sequentially. + +Aliases: `--except`, `--exclude-next`, `--complement`, `--complement-next` + +## Symmetric Difference (`--diff`) + +This is a **positional** operation. Files before `--diff` form set A; files after form set B. The output is the IPs in A or B but **not both** (the XOR). + +```bash +iprange before.txt --diff after.txt +``` + +**Exit code**: 0 if the sets are identical (no output), 1 if there are differences. + +Use `--quiet` to suppress the output and only check the exit code: + +```bash +iprange old.txt --diff new.txt --quiet +if [ $? -eq 1 ]; then + echo "sets differ" +fi +``` + +Aliases: `--diff`, `--diff-next` + +## Reduce Prefixes (`--ipset-reduce`) + +Merge all inputs, then reduce the number of distinct CIDR prefixes. The matched IP set remains identical — only the CIDR representation changes. + +```bash +iprange --ipset-reduce 20 blocklist.txt +``` + +See [Optimizing ipsets for iptables](ipset-reduce.md) for a detailed explanation with examples. + +| Option | Default | Meaning | +|--------|---------|---------| +| `--ipset-reduce PERCENT` | 20 | Allow this % increase in entries | +| `--ipset-reduce-entries ENTRIES` | 16384 | Minimum acceptable entry count | + +Aliases: `--reduce-factor`, `--reduce-entries` + +## Compare (CSV modes) + +These modes produce CSV output for analyzing overlap between IP sets. Add `--header` to include column names. + +### Compare all (`--compare`) + +Compare every file with every other file: + +```bash +iprange --compare --header file1.txt file2.txt file3.txt +``` + +Output columns: `name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips` + +### Compare first (`--compare-first`) + +Compare the first file against each subsequent file: + +```bash +iprange --compare-first --header reference.txt other1.txt other2.txt +``` + +Output columns: `name,entries,unique_ips,common_ips` + +### Compare next (`--compare-next`) + +**Positional**: compare files before the option against files after: + +```bash +iprange file1.txt file2.txt --compare-next file3.txt file4.txt +``` + +Output columns: same as `--compare` + +## Count Unique (CSV modes) + +### Count merged (`--count-unique` / `-C`) + +Merge all inputs and print one CSV line with totals: + +```bash +iprange -C --header blocklist.txt +``` + +Output columns: `entries,unique_ips` + +### Count per file (`--count-unique-all`) + +Print one CSV line per input file (no merging): + +```bash +iprange --count-unique-all --header file1.txt file2.txt +``` + +Output columns: `name,entries,unique_ips` diff --git a/docs/output-formats.md b/docs/output-formats.md new file mode 100644 index 0000000..69891c8 --- /dev/null +++ b/docs/output-formats.md @@ -0,0 +1,141 @@ +# Output formats + +## CIDR (default) + +The default output is sorted, non-overlapping CIDR blocks: + +``` +10.0.0.0/24 +10.0.1.0/25 +10.0.1.128/26 +``` + +This is the optimal representation — the fewest entries that cover the exact set of IPs. + +### Controlling CIDR prefixes + +**Minimum prefix** — restrict the largest block size: + +```bash +# Only /24 to /32 (no blocks larger than /24) +iprange --min-prefix 24 blocklist.txt +``` + +A /16 network would be expressed as 256 /24 entries. Warning: misuse can produce very large output. + +**Specific prefixes** — allow only certain prefix lengths: + +```bash +# Only /16, /24, and /32 +iprange --prefixes 16,24,32 blocklist.txt +``` + +Prefix /32 is always enabled regardless of settings. + +For IPv6, the same options work with prefix range 0-128. + +## Ranges (`--print-ranges` / `-j`) + +Print start-end ranges: + +``` +10.0.0.0-10.0.0.255 +10.0.1.0-10.0.1.191 +``` + +Single IPs print as ranges with identical endpoints: `10.0.0.5-10.0.0.5` + +## Single IPs (`--print-single-ips` / `-1`) + +Enumerate every individual IP address: + +``` +10.0.0.0 +10.0.0.1 +10.0.0.2 +... +``` + +**Safety cap**: ranges larger than 16,777,216 IPs (256^3) are skipped with a warning to stderr. This prevents unbounded output from large ranges like `0.0.0.0/0`. The cap applies to both IPv4 and IPv6 modes. + +## Binary (`--print-binary`) + +Machine-readable binary format for fast round-trips: + +```bash +# Save +iprange --print-binary blocklist.txt > cache.bin + +# Load +iprange cache.bin +``` + +Binary files include: +- A header line identifying the format version (v1.0 for IPv4, v2.0 for IPv6) +- Metadata: family, optimization flag, record count, unique IP count +- An endianness marker +- Raw address-pair records + +Binary files are **architecture-specific** — they use native byte order and are intended as a same-machine cache. Do not transfer between machines with different endianness. + +## CSV output + +CSV output is produced by the comparison and counting modes: + +| Mode | Columns | +|------|---------| +| `--compare` | name1, name2, entries1, entries2, ips1, ips2, combined_ips, common_ips | +| `--compare-first` | name, entries, unique_ips, common_ips | +| `--compare-next` | name1, name2, entries1, entries2, ips1, ips2, combined_ips, common_ips | +| `--count-unique` | entries, unique_ips | +| `--count-unique-all` | name, entries, unique_ips | + +Add `--header` to print the column header as the first line. + +## Prefix and suffix strings + +Customize output lines with arbitrary strings before and after each entry. This is useful for generating ipset restore commands, iptables rules, or other tool-specific formats. + +### Basic usage + +```bash +# Add prefix to every line +iprange --print-prefix "add myset " blocklist.txt +# Output: add myset 10.0.0.0/24 + +# Add suffix to every line +iprange --print-suffix " timeout 3600" blocklist.txt +# Output: 10.0.0.0/24 timeout 3600 +``` + +### Separate handling for IPs and networks + +Single IPs (/32 entries) and networks (other prefixes) can have different prefixes and suffixes. This is useful when single IPs and networks go into different ipsets: + +```bash +iprange \ + --print-prefix-ips "add single-ips " \ + --print-prefix-nets "add networks " \ + blocklist.txt +# Output: +# add networks 10.0.0.0/24 +# add single-ips 10.0.1.5 +``` + +| Option | Applies to | +|--------|------------| +| `--print-prefix STRING` | Both IPs and networks | +| `--print-prefix-ips STRING` | Single IPs only (/32) | +| `--print-prefix-nets STRING` | Networks only (not /32) | +| `--print-suffix STRING` | Both IPs and networks | +| `--print-suffix-ips STRING` | Single IPs only (/32) | +| `--print-suffix-nets STRING` | Networks only (not /32) | + +## Quiet mode + +Use `--quiet` with `--diff` to suppress output and only use the exit code: + +```bash +iprange old.txt --diff new.txt --quiet +echo $? # 0 = identical, 1 = different +``` From 2885de4261d330f4a10c13e376edc77c259ae0b2 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Sat, 4 Apr 2026 19:32:29 +0300 Subject: [PATCH 7/9] Move docs into wiki/ with GitHub Action sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename docs/ to wiki/ — source of truth for wiki content - Add wiki/Home.md as the wiki landing page with navigation - Add .github/workflows/sync-wiki.yml to auto-publish wiki/ to the GitHub wiki on push to master - Update README.md links to wiki/ directory The wiki at github.com/firehol/iprange/wiki is now a mirror of the wiki/ directory in this repo. Edit wiki/ in the repo, and the GitHub Action syncs it to the wiki on merge to master. --- .github/workflows/sync-wiki.yml | 37 +++++++++++++++++ README.md | 16 ++++---- wiki/Home.md | 69 ++++++++++++++++++++++++++++++++ {docs => wiki}/dns-resolution.md | 0 {docs => wiki}/input-formats.md | 0 {docs => wiki}/ipset-reduce.md | 0 {docs => wiki}/ipv6.md | 0 {docs => wiki}/operations.md | 0 {docs => wiki}/output-formats.md | 0 9 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/sync-wiki.yml create mode 100644 wiki/Home.md rename {docs => wiki}/dns-resolution.md (100%) rename {docs => wiki}/input-formats.md (100%) rename {docs => wiki}/ipset-reduce.md (100%) rename {docs => wiki}/ipv6.md (100%) rename {docs => wiki}/operations.md (100%) rename {docs => wiki}/output-formats.md (100%) diff --git a/.github/workflows/sync-wiki.yml b/.github/workflows/sync-wiki.yml new file mode 100644 index 0000000..c3ac510 --- /dev/null +++ b/.github/workflows/sync-wiki.yml @@ -0,0 +1,37 @@ +name: Sync wiki + +on: + push: + branches: [master] + paths: + - 'wiki/**' + +jobs: + sync: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Checkout wiki + uses: actions/checkout@v4 + with: + repository: ${{ github.repository }}.wiki + path: wiki-repo + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Sync wiki files + run: | + cp wiki/*.md wiki-repo/ + cd wiki-repo + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add -A + if git diff --cached --quiet; then + echo "No wiki changes to sync" + else + git commit -m "Sync wiki from repo" + git push + fi diff --git a/README.md b/README.md index 47f2534..4cf3d11 100644 --- a/README.md +++ b/README.md @@ -366,7 +366,7 @@ To skip the man page: `./configure --disable-man` | Directory | Contents | |-----------|----------| | `src/` | C sources and headers | -| `docs/` | Detailed documentation | +| `wiki/` | Documentation (synced to GitHub wiki) | | `packaging/` | Spec template, ebuild, release tooling | | `tests.d/` | CLI regression tests | | `tests.build.d/` | Build and layout regressions | @@ -376,14 +376,14 @@ To skip the man page: `./configure --disable-man` ## Documentation -Detailed guides in the [`docs/`](docs/) directory: +Detailed guides in the [`wiki/`](wiki/) directory (also published to the [GitHub wiki](https://github.com/firehol/iprange/wiki)): -- [Input formats](docs/input-formats.md) — every accepted format, file lists, directories, binary -- [Output formats](docs/output-formats.md) — CIDR, ranges, single IPs, binary, CSV, prefix/suffix -- [Operations](docs/operations.md) — merge, intersect, exclude, diff, reduce, compare, count -- [IPv6 support](docs/ipv6.md) — address family, normalization, cross-family rules -- [DNS resolution](docs/dns-resolution.md) — threading, retry, configuration -- [Optimizing ipsets for iptables](docs/ipset-reduce.md) — prefix reduction with examples +- [Input formats](wiki/input-formats.md) — every accepted format, file lists, directories, binary +- [Output formats](wiki/output-formats.md) — CIDR, ranges, single IPs, binary, CSV, prefix/suffix +- [Operations](wiki/operations.md) — merge, intersect, exclude, diff, reduce, compare, count +- [IPv6 support](wiki/ipv6.md) — address family, normalization, cross-family rules +- [DNS resolution](wiki/dns-resolution.md) — threading, retry, configuration +- [Optimizing ipsets for iptables](wiki/ipset-reduce.md) — prefix reduction with examples ## Getting help diff --git a/wiki/Home.md b/wiki/Home.md new file mode 100644 index 0000000..fb1ba7f --- /dev/null +++ b/wiki/Home.md @@ -0,0 +1,69 @@ +# iprange + +`iprange` is a fast command-line tool for managing IPv4 and IPv6 address sets. It reads IPs, CIDRs, ranges, and hostnames, normalizes them into optimal non-overlapping sets, and performs set operations: union, intersection, difference, complement, comparison, and prefix reduction. + +For 1 million input lines, a merge completes in under a second. + +## Documentation + +- [Input formats](input-formats.md) — every accepted format, file lists, directories, binary input +- [Output formats](output-formats.md) — CIDR, ranges, single IPs, binary, CSV, prefix/suffix strings +- [Operations](operations.md) — merge, intersect, exclude, diff, reduce, compare, count +- [IPv6 support](ipv6.md) — address family selection, normalization, cross-family rules +- [DNS resolution](dns-resolution.md) — parallel threading, retry, configuration +- [Optimizing ipsets for iptables](ipset-reduce.md) — prefix reduction tutorial with examples + +## Quick reference + +``` +iprange [options] file1 file2 ... +``` + +### Address family + +| Flag | Mode | +|------|------| +| *(default)* | IPv4 | +| `-4` / `--ipv4` | Explicit IPv4 | +| `-6` / `--ipv6` | IPv6 (accepts both IPv6 and IPv4, normalizes IPv4 to `::ffff:x.x.x.x`) | + +### Operations + +| Option | Operation | Output | +|--------|-----------|--------| +| *(default)* | Union / merge | CIDR | +| `--common` | Intersection | CIDR | +| `--except` | A minus B (positional) | CIDR | +| `--diff` | Symmetric difference (positional) | CIDR | +| `--ipset-reduce N` | Merge + reduce prefixes | CIDR | +| `--compare` | All vs all | CSV | +| `--compare-first` | First vs rest | CSV | +| `--compare-next` | Group vs group (positional) | CSV | +| `--count-unique` / `-C` | Merged counts | CSV | +| `--count-unique-all` | Per-file counts | CSV | + +### Output format + +| Option | Format | +|--------|--------| +| *(default)* | CIDR (`10.0.0.0/24`) | +| `-j` / `--print-ranges` | Ranges (`10.0.0.0-10.0.0.255`) | +| `-1` / `--print-single-ips` | One IP per line | +| `--print-binary` | Binary (same-architecture cache) | + +### Feature detection + +```bash +iprange --has-compare # compare modes +iprange --has-reduce # reduce mode +iprange --has-filelist-loading # @filename support +iprange --has-directory-loading # @directory support +iprange --has-ipv6 # IPv6 support +``` + +Each exits 0 if the feature is present. + +## Related projects + +- [FireHOL IP Lists](https://iplists.firehol.org) — curated collection of IP blocklists, updated daily using `iprange` +- [FireHOL](https://github.com/firehol/firehol) — Linux firewall tool that uses `iprange` for ipset management diff --git a/docs/dns-resolution.md b/wiki/dns-resolution.md similarity index 100% rename from docs/dns-resolution.md rename to wiki/dns-resolution.md diff --git a/docs/input-formats.md b/wiki/input-formats.md similarity index 100% rename from docs/input-formats.md rename to wiki/input-formats.md diff --git a/docs/ipset-reduce.md b/wiki/ipset-reduce.md similarity index 100% rename from docs/ipset-reduce.md rename to wiki/ipset-reduce.md diff --git a/docs/ipv6.md b/wiki/ipv6.md similarity index 100% rename from docs/ipv6.md rename to wiki/ipv6.md diff --git a/docs/operations.md b/wiki/operations.md similarity index 100% rename from docs/operations.md rename to wiki/operations.md diff --git a/docs/output-formats.md b/wiki/output-formats.md similarity index 100% rename from docs/output-formats.md rename to wiki/output-formats.md From 97bcd44b7787f4b0a36b80025070cb645e9d4740 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Sat, 4 Apr 2026 19:51:15 +0300 Subject: [PATCH 8/9] Split operations into individual wiki pages with verified examples Replace the monolithic operations.md with one page per operation: merge, intersect, exclude, diff, reduce, compare, count-unique. Every example in every page was run against the built iprange binary and the output copied verbatim into the documentation. --- README.md | 11 ++-- wiki/Home.md | 15 ++++- wiki/compare.md | 67 +++++++++++++++++++++ wiki/count-unique.md | 61 +++++++++++++++++++ wiki/diff.md | 58 ++++++++++++++++++ wiki/exclude.md | 80 +++++++++++++++++++++++++ wiki/intersect.md | 39 ++++++++++++ wiki/merge.md | 79 ++++++++++++++++++++++++ wiki/operations.md | 139 ------------------------------------------- wiki/reduce.md | 76 +++++++++++++++++++++++ 10 files changed, 478 insertions(+), 147 deletions(-) create mode 100644 wiki/compare.md create mode 100644 wiki/count-unique.md create mode 100644 wiki/diff.md create mode 100644 wiki/exclude.md create mode 100644 wiki/intersect.md create mode 100644 wiki/merge.md delete mode 100644 wiki/operations.md create mode 100644 wiki/reduce.md diff --git a/README.md b/README.md index 4cf3d11..92fcb9d 100644 --- a/README.md +++ b/README.md @@ -378,12 +378,11 @@ To skip the man page: `./configure --disable-man` Detailed guides in the [`wiki/`](wiki/) directory (also published to the [GitHub wiki](https://github.com/firehol/iprange/wiki)): -- [Input formats](wiki/input-formats.md) — every accepted format, file lists, directories, binary -- [Output formats](wiki/output-formats.md) — CIDR, ranges, single IPs, binary, CSV, prefix/suffix -- [Operations](wiki/operations.md) — merge, intersect, exclude, diff, reduce, compare, count -- [IPv6 support](wiki/ipv6.md) — address family, normalization, cross-family rules -- [DNS resolution](wiki/dns-resolution.md) — threading, retry, configuration -- [Optimizing ipsets for iptables](wiki/ipset-reduce.md) — prefix reduction with examples +**Operations** — one page per operation with verified examples: +[Merge](wiki/merge.md) | [Intersection](wiki/intersect.md) | [Exclude](wiki/exclude.md) | [Diff](wiki/diff.md) | [Reduce](wiki/reduce.md) | [Compare](wiki/compare.md) | [Count](wiki/count-unique.md) + +**Reference:** +[Input formats](wiki/input-formats.md) | [Output formats](wiki/output-formats.md) | [IPv6](wiki/ipv6.md) | [DNS resolution](wiki/dns-resolution.md) | [Ipset optimization](wiki/ipset-reduce.md) ## Getting help diff --git a/wiki/Home.md b/wiki/Home.md index fb1ba7f..100109b 100644 --- a/wiki/Home.md +++ b/wiki/Home.md @@ -6,12 +6,23 @@ For 1 million input lines, a merge completes in under a second. ## Documentation +### Operations + +- [Merge / Union](merge.md) — merge all inputs into one optimized set (default mode) +- [Intersection](intersect.md) — find IPs common to all inputs +- [Complement / Exclude](exclude.md) — remove one set from another +- [Symmetric Difference](diff.md) — find IPs in either set but not both +- [Reduce Prefixes](reduce.md) — reduce CIDR prefix diversity for firewall performance +- [Compare](compare.md) — compare sets pairwise as CSV (all, first, next) +- [Count Unique](count-unique.md) — count entries and unique IPs as CSV + +### Reference + - [Input formats](input-formats.md) — every accepted format, file lists, directories, binary input - [Output formats](output-formats.md) — CIDR, ranges, single IPs, binary, CSV, prefix/suffix strings -- [Operations](operations.md) — merge, intersect, exclude, diff, reduce, compare, count - [IPv6 support](ipv6.md) — address family selection, normalization, cross-family rules - [DNS resolution](dns-resolution.md) — parallel threading, retry, configuration -- [Optimizing ipsets for iptables](ipset-reduce.md) — prefix reduction tutorial with examples +- [Optimizing ipsets for iptables](ipset-reduce.md) — extended tutorial with real-world examples ## Quick reference diff --git a/wiki/compare.md b/wiki/compare.md new file mode 100644 index 0000000..4a622a5 --- /dev/null +++ b/wiki/compare.md @@ -0,0 +1,67 @@ +# Compare + +Compare IP sets pairwise and print CSV with overlap statistics. + +Three comparison modes are available: + +## Compare all (`--compare`) + +Compare every file with every other file. + +``` +# list-a.txt # list-b.txt # list-c.txt +10.0.0.0/24 10.0.0.128/25 10.0.0.0/16 +10.0.1.0/24 10.0.2.0/24 172.16.0.0/12 +192.168.1.0/24 192.168.1.0/24 +``` + +``` +$ iprange --compare --header list-a.txt list-b.txt list-c.txt +name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips +list-a.txt,list-b.txt,2,3,768,640,1024,384 +list-a.txt,list-c.txt,2,2,768,1114112,1114368,512 +list-b.txt,list-c.txt,3,2,640,1114112,1114368,384 +``` + +**Columns**: name1, name2, entries in each, unique IPs in each, combined (union) IPs, common (intersection) IPs. + +## Compare first (`--compare-first`) + +Compare the first file against each subsequent file. + +``` +$ iprange --compare-first --header list-a.txt list-b.txt list-c.txt +name,entries,unique_ips,common_ips +list-b.txt,3,640,384 +list-c.txt,2,1114112,512 +``` + +This is useful for checking how much of a reference list overlaps with each of several other lists. + +## Compare next (`--compare-next`) + +**Positional**: compare files before the option against files after it. + +``` +$ iprange list-a.txt --compare-next list-b.txt list-c.txt --header +name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips +list-a.txt,list-b.txt,2,3,768,640,1024,384 +list-a.txt,list-c.txt,2,2,768,1114112,1114368,512 +``` + +Only `list-a.txt` (before `--compare-next`) is compared against `list-b.txt` and `list-c.txt` (after it). + +## Naming files in CSV output + +Use `as NAME` after a filename to set a custom name in the CSV: + +``` +$ iprange --count-unique-all --header list-a.txt as "Blocklist A" list-b.txt as "Blocklist B" +name,entries,unique_ips +Blocklist A,2,768 +Blocklist B,3,640 +``` + +## CSV header + +All CSV modes default to no header. Add `--header` to include column names as the first row. diff --git a/wiki/count-unique.md b/wiki/count-unique.md new file mode 100644 index 0000000..9a95419 --- /dev/null +++ b/wiki/count-unique.md @@ -0,0 +1,61 @@ +# Count Unique + +Print entry counts and unique IP counts as CSV. + +Two counting modes are available: + +## Count merged (`--count-unique` / `-C`) + +Merge all inputs and print a single CSV line with the totals: + +``` +# list-a.txt # list-b.txt +10.0.0.0/24 10.0.0.128/25 +10.0.1.0/24 10.0.2.0/24 +192.168.1.0/24 192.168.1.0/24 +``` + +``` +$ iprange -C --header list-a.txt list-b.txt +entries,unique_ips +2,1024 +``` + +The merged set has 2 entries (ranges) covering 1024 unique IPs. + +## Count per file (`--count-unique-all`) + +Print one CSV line per input file without merging: + +``` +$ iprange --count-unique-all --header list-a.txt list-b.txt list-c.txt +name,entries,unique_ips +list-a.txt,2,768 +list-b.txt,3,640 +list-c.txt,2,1114112 +``` + +## Naming files in CSV output + +Use `as NAME` to customize the name column: + +``` +$ iprange --count-unique-all --header list-a.txt as "Blocklist A" list-b.txt as "Blocklist B" +name,entries,unique_ips +Blocklist A,2,768 +Blocklist B,3,640 +``` + +## IPv6 + +``` +$ printf '2001:db8::/32\n' | iprange -6 -C --header +entries,unique_ips +1,79228162514264337593543950336 +``` + +IPv6 unique IP counts are printed as full 128-bit decimal numbers. + +## CSV header + +Add `--header` to include column names. Without it, only data lines are printed. diff --git a/wiki/diff.md b/wiki/diff.md new file mode 100644 index 0000000..39c3fcd --- /dev/null +++ b/wiki/diff.md @@ -0,0 +1,58 @@ +# Symmetric Difference + +Print the IPs that exist in either A or B, but **not both**. + +**Aliases**: `--diff`, `--diff-next` + +## How it works + +This is a **positional** operation: +1. All files before `--diff` are merged into set **A** +2. All files after `--diff` are merged into set **B** +3. The output is (A - B) union (B - A) — the XOR of the two sets + +**Exit code**: 0 if the sets are identical (no output), 1 if there are differences. This makes `--diff` useful in scripts to detect changes. + +## Examples + +Compare two versions of a blocklist: + +``` +# before.txt # after.txt +10.0.0.0/24 10.0.0.0/24 +10.0.1.0/24 10.0.1.0/25 # shrunk +10.0.2.0/24 10.0.2.0/24 + 10.0.3.0/24 # added +``` + +``` +$ iprange before.txt --diff after.txt +10.0.1.128/25 +10.0.3.0/24 +``` + +The output shows `10.0.1.128/25` (the upper half of the /24 that was shrunk to a /25) and `10.0.3.0/24` (newly added). The two entries that remained identical are excluded. + +**Exit code check**: + +``` +$ iprange before.txt --diff after.txt +10.0.1.128/25 +10.0.3.0/24 +$ echo $? +1 +``` + +``` +$ iprange before.txt --diff before.txt +$ echo $? +0 +``` + +**Quiet mode** — suppress output, only check exit code: + +``` +$ iprange before.txt --diff after.txt --quiet +$ echo $? +1 +``` diff --git a/wiki/exclude.md b/wiki/exclude.md new file mode 100644 index 0000000..ddbdc59 --- /dev/null +++ b/wiki/exclude.md @@ -0,0 +1,80 @@ +# Complement / Exclude + +Merge all files before `--except`, then remove all IPs matched by the files after it. + +**Aliases**: `--except`, `--exclude-next`, `--complement`, `--complement-next` + +## How it works + +This is a **positional** operation: +1. All files before `--except` are merged into set **A** +2. Each file after `--except` is subtracted from A, one by one +3. The result is the IPs in A that are not in any of the subtracted sets + +## Examples + +Remove specific entries from a whitelist: + +``` +# allow.txt # deny.txt +10.0.0.0/8 10.0.0.0/24 +172.16.0.0/12 172.16.0.0/16 +192.168.0.0/16 192.168.1.100 +``` + +``` +$ iprange allow.txt --except deny.txt +10.0.1.0/24 +10.0.2.0/23 +10.0.4.0/22 +10.0.8.0/21 +10.0.16.0/20 +10.0.32.0/19 +10.0.64.0/18 +10.0.128.0/17 +10.1.0.0/16 +10.2.0.0/15 +10.4.0.0/14 +10.8.0.0/13 +10.16.0.0/12 +10.32.0.0/11 +10.64.0.0/10 +10.128.0.0/9 +172.17.0.0/16 +172.18.0.0/15 +172.20.0.0/14 +172.24.0.0/13 +192.168.0.0/24 +192.168.1.0/26 +192.168.1.64/27 +192.168.1.96/30 +192.168.1.101 +192.168.1.102/31 +192.168.1.104/29 +192.168.1.112/28 +192.168.1.128/25 +192.168.2.0/23 +192.168.4.0/22 +192.168.8.0/21 +192.168.16.0/20 +192.168.32.0/19 +192.168.64.0/18 +192.168.128.0/17 +``` + +The `10.0.0.0/24` was carved out of `10.0.0.0/8`, leaving the remaining address space as multiple CIDRs. The single IP `192.168.1.100` was punched out of `192.168.0.0/16`, creating a gap around it. + +## IPv6 + +``` +$ printf '2001:db8::/32\n' > all.txt +$ printf '2001:db8:1::/48\n' > remove.txt +$ iprange -6 all.txt --except remove.txt | head -5 +2001:db8::/48 +2001:db8:2::/47 +2001:db8:4::/46 +2001:db8:8::/45 +2001:db8:10::/44 +``` + +Carving a /48 out of a /32 leaves 16 CIDR blocks covering the remaining address space. diff --git a/wiki/intersect.md b/wiki/intersect.md new file mode 100644 index 0000000..487f2f3 --- /dev/null +++ b/wiki/intersect.md @@ -0,0 +1,39 @@ +# Intersection + +Print only the IPs that appear in **all** input files. + +**Aliases**: `--common`, `--intersect`, `--intersect-all` + +## How it works + +Each input file is optimized, then the intersection is computed pairwise. An IP appears in the output only if it is covered by every input file. If the files have no overlap, the output is empty. + +## Examples + +Find IPs common to two blocklists: + +``` +# list-a.txt # list-b.txt +10.0.0.0/24 10.0.0.128/25 +10.0.1.0/24 10.0.2.0/24 +192.168.1.0/24 192.168.1.0/24 +``` + +``` +$ iprange --common list-a.txt list-b.txt +10.0.0.128/25 +192.168.1.0/24 +``` + +Only the upper half of `10.0.0.0/24` (which is `10.0.0.128/25`) overlaps with `list-b.txt`'s `10.0.0.128/25`. `192.168.1.0/24` is in both files. `10.0.1.0/24` and `10.0.2.0/24` have no overlap and are excluded. + +## IPv6 + +``` +$ printf '2001:db8::/32\n' > v6-a.txt +$ printf '2001:db8:1::/48\n2001:db9::/32\n' > v6-b.txt +$ iprange -6 --common v6-a.txt v6-b.txt +2001:db8:1::/48 +``` + +The `/32` in v6-a.txt contains the `/48` from v6-b.txt. The `2001:db9::/32` in v6-b.txt does not overlap. diff --git a/wiki/merge.md b/wiki/merge.md new file mode 100644 index 0000000..7936cb3 --- /dev/null +++ b/wiki/merge.md @@ -0,0 +1,79 @@ +# Merge / Union + +Merge all inputs into one sorted, deduplicated, non-overlapping set. This is the default operation when no mode flag is given. + +**Aliases**: `--optimize`, `--combine`, `--merge`, `--union`, `--union-all`, `-J` + +## How it works + +All input entries are combined, then sorted and optimized: overlapping ranges are merged, adjacent ranges are combined, and duplicates are eliminated. The result is the smallest set of non-overlapping CIDRs that covers exactly the same IPs. + +## Examples + +Normalize a file with mixed input formats: + +``` +# input: mixed.txt +# Blocklist from multiple sources +1.2.3.4 +10.0.0.0/24 +10.0.0.200 - 10.0.1.50 +; another comment +192.168.1.0/255.255.255.0 +``` + +``` +$ iprange mixed.txt +1.2.3.4 +10.0.0.0/24 +10.0.1.0/27 +10.0.1.32/28 +10.0.1.48/31 +10.0.1.50 +192.168.1.0/24 +``` + +Merge two overlapping files: + +``` +# list-a.txt # list-b.txt +10.0.0.0/24 10.0.0.128/25 +10.0.1.0/24 10.0.2.0/24 +192.168.1.0/24 192.168.1.0/24 +``` + +``` +$ iprange list-a.txt list-b.txt +10.0.0.0/23 +10.0.2.0/24 +192.168.1.0/24 +``` + +The two /24 networks `10.0.0.0/24` and `10.0.1.0/24` merged into one /23. The duplicate `192.168.1.0/24` was deduplicated. `10.0.0.128/25` was absorbed by `10.0.0.0/24`. + +Merge from stdin: + +``` +$ printf '10.0.0.5\n10.0.0.6\n10.0.0.7\n10.0.0.8\n' | iprange +10.0.0.5 +10.0.0.6/31 +10.0.0.8 +``` + +Four individual IPs consolidated into optimal CIDRs: one /31 block plus two singles. + +## IPv6 + +``` +$ printf '2001:db8::1\n2001:db8::2\n2001:db8::3\n2001:db8::4\n' | iprange -6 +2001:db8::1 +2001:db8::2/127 +2001:db8::4 +``` + +``` +$ printf '2001:db8::/48\n2001:db8:1::/48\n' | iprange -6 +2001:db8::/47 +``` + +Two adjacent /48 networks merge into one /47. diff --git a/wiki/operations.md b/wiki/operations.md deleted file mode 100644 index a8e0200..0000000 --- a/wiki/operations.md +++ /dev/null @@ -1,139 +0,0 @@ -# Operations - -All operations produce sorted, non-overlapping output. The output family matches the input family (IPv4 or IPv6). - -## Merge / Union (default) - -Merge all inputs into one optimized set. This is the default when no mode option is given. - -```bash -iprange file1.txt file2.txt file3.txt -``` - -Overlapping and adjacent ranges are combined. Duplicates are eliminated. The result is the union of all input sets. - -Aliases: `--optimize`, `--combine`, `--merge`, `--union`, `--union-all`, `-J` - -## Intersection (`--common`) - -Print only the IPs that appear in **all** input files. - -```bash -iprange --common file1.txt file2.txt file3.txt -``` - -If any file has no overlap with the others, the output is empty. - -Aliases: `--common`, `--intersect`, `--intersect-all` - -## Complement / Exclude (`--except`) - -This is a **positional** operation. Files before `--except` form set A; files after form set B. The output is A minus B. - -```bash -iprange allow.txt --except deny.txt -``` - -Multiple files can appear on either side: - -```bash -iprange a1.txt a2.txt --except b1.txt b2.txt b3.txt -``` - -Files before `--except` are merged into A. Each file after is subtracted from A sequentially. - -Aliases: `--except`, `--exclude-next`, `--complement`, `--complement-next` - -## Symmetric Difference (`--diff`) - -This is a **positional** operation. Files before `--diff` form set A; files after form set B. The output is the IPs in A or B but **not both** (the XOR). - -```bash -iprange before.txt --diff after.txt -``` - -**Exit code**: 0 if the sets are identical (no output), 1 if there are differences. - -Use `--quiet` to suppress the output and only check the exit code: - -```bash -iprange old.txt --diff new.txt --quiet -if [ $? -eq 1 ]; then - echo "sets differ" -fi -``` - -Aliases: `--diff`, `--diff-next` - -## Reduce Prefixes (`--ipset-reduce`) - -Merge all inputs, then reduce the number of distinct CIDR prefixes. The matched IP set remains identical — only the CIDR representation changes. - -```bash -iprange --ipset-reduce 20 blocklist.txt -``` - -See [Optimizing ipsets for iptables](ipset-reduce.md) for a detailed explanation with examples. - -| Option | Default | Meaning | -|--------|---------|---------| -| `--ipset-reduce PERCENT` | 20 | Allow this % increase in entries | -| `--ipset-reduce-entries ENTRIES` | 16384 | Minimum acceptable entry count | - -Aliases: `--reduce-factor`, `--reduce-entries` - -## Compare (CSV modes) - -These modes produce CSV output for analyzing overlap between IP sets. Add `--header` to include column names. - -### Compare all (`--compare`) - -Compare every file with every other file: - -```bash -iprange --compare --header file1.txt file2.txt file3.txt -``` - -Output columns: `name1,name2,entries1,entries2,ips1,ips2,combined_ips,common_ips` - -### Compare first (`--compare-first`) - -Compare the first file against each subsequent file: - -```bash -iprange --compare-first --header reference.txt other1.txt other2.txt -``` - -Output columns: `name,entries,unique_ips,common_ips` - -### Compare next (`--compare-next`) - -**Positional**: compare files before the option against files after: - -```bash -iprange file1.txt file2.txt --compare-next file3.txt file4.txt -``` - -Output columns: same as `--compare` - -## Count Unique (CSV modes) - -### Count merged (`--count-unique` / `-C`) - -Merge all inputs and print one CSV line with totals: - -```bash -iprange -C --header blocklist.txt -``` - -Output columns: `entries,unique_ips` - -### Count per file (`--count-unique-all`) - -Print one CSV line per input file (no merging): - -```bash -iprange --count-unique-all --header file1.txt file2.txt -``` - -Output columns: `name,entries,unique_ips` diff --git a/wiki/reduce.md b/wiki/reduce.md new file mode 100644 index 0000000..71af6d0 --- /dev/null +++ b/wiki/reduce.md @@ -0,0 +1,76 @@ +# Reduce Prefixes + +Merge all inputs, then reduce the number of distinct CIDR prefixes while keeping the matched IP set identical. + +**Aliases**: `--ipset-reduce PERCENT`, `--reduce-factor PERCENT`, `--ipset-reduce-entries ENTRIES`, `--reduce-entries ENTRIES` + +## Why + +netfilter/iptables `hash:net` ipsets perform one lookup per distinct prefix length. An ipset using 18 prefixes does 18 lookups per packet. Reducing to 3 prefixes cuts that to 3 lookups — for the exact same set of matched IPs. + +The number of entries does not affect ipset lookup performance. + +## Parameters + +| Option | Default | Purpose | +|--------|---------|---------| +| `--ipset-reduce PERCENT` | 20 | Allow this % increase in entries | +| `--ipset-reduce-entries ENTRIES` | 16384 | Minimum absolute entry cap | + +Maximum acceptable entries = `max(current * (1 + PERCENT/100), ENTRIES)`. + +The algorithm iteratively eliminates the prefix with the smallest cost (fewest new entries added), merging it into the next available prefix. Use `-v` to see each step. + +## Example + +A file with entries spanning many prefix lengths: + +``` +# input +10.0.0.0/24 +10.0.1.0/25 +10.0.1.128/26 +10.0.1.192/27 +10.0.1.224/28 +10.0.1.240/29 +10.0.1.248/30 +10.0.1.252/31 +10.0.1.254 +10.0.1.255 +10.0.2.0/24 +10.0.3.0/25 +``` + +Before reduction — 3 prefixes: + +``` +$ iprange -v input.txt 2>&1 | grep -E 'prefix|totals' + - prefix /23 counts 1 entries + - prefix /24 counts 1 entries + - prefix /25 counts 1 entries +totals: 12 lines read, 1 distinct IP ranges found, 3 CIDR prefixes, 3 CIDRs printed, 896 unique IPs +``` + +After `--ipset-reduce 50` — 1 prefix: + +``` +$ iprange -v --ipset-reduce 50 input.txt 2>&1 | grep -E 'prefix|totals|Eliminated' +Eliminated 2 out of 3 prefixes (1 remain in the final set). + - prefix /25 counts 7 entries +totals: 12 lines read, 1 distinct IP ranges found, 1 CIDR prefixes, 7 CIDRs printed, 896 unique IPs +``` + +Same 896 unique IPs, expressed as 7 /25 entries instead of a mix of /23, /24, and /25. The kernel now does 1 lookup instead of 3. + +## Lossless round-trip + +The reduction is lossless — piping the reduced output back through `iprange` produces the original optimized set: + +``` +$ iprange --ipset-reduce 100 input.txt | iprange -C +1,896 +$ iprange input.txt | iprange -C +1,896 +``` + +See [Optimizing ipsets for iptables](ipset-reduce.md) for an extended tutorial with real-world country and blocklist examples. From 6452175c8b3edcfb2533747df47d20defc36a6d4 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Sat, 4 Apr 2026 20:05:40 +0300 Subject: [PATCH 9/9] Update CMakeLists.txt to build with current source files - Add missing ipset_dns.c/h and ipset6_dns.c/h - Link pthreads properly via Threads::Threads - Generate config.h from cmake/config.h.in template - Check for __uint128_t at configure time - Enable COMPARE_WITH_COMMON and compiler warnings - Fix target name to 'iprange' (was 'iprange_git') --- CMakeLists.txt | 86 ++++++++++++++++++++++++++++++++--------------- cmake/config.h.in | 13 +++++++ 2 files changed, 72 insertions(+), 27 deletions(-) create mode 100644 cmake/config.h.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 2caed4c..4b4719b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,33 +1,65 @@ -cmake_minimum_required(VERSION 3.3) +cmake_minimum_required(VERSION 3.10) project(iprange C) -find_package (Threads) +find_package(Threads REQUIRED) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wformat-signedness -Werror=format-security") +include(CheckTypeSize) +check_type_size("__uint128_t" UINT128_SIZE LANGUAGE C) +if(NOT HAVE_UINT128_SIZE) + message(FATAL_ERROR "Compiler does not support __uint128_t, required for IPv6") +endif() -set(SOURCE_FILES - src/iprange.c src/iprange.h src/iprange6.h src/iprange6_main.c - src/ipset.c src/ipset.h - src/ipset6.c src/ipset6.h - src/ipset6_binary.c src/ipset6_binary.h - src/ipset6_combine.c src/ipset6_common.c src/ipset6_copy.c - src/ipset6_diff.c src/ipset6_exclude.c - src/ipset6_load.c src/ipset6_load.h - src/ipset6_merge.c src/ipset6_optimize.c - src/ipset6_print.c src/ipset6_print.h - src/ipset_binary.c src/ipset_binary.h - src/ipset_combine.c src/ipset_combine.h - src/ipset_common.c src/ipset_common.h - src/ipset_copy.c src/ipset_copy.h - src/ipset_diff.c src/ipset_diff.h - src/ipset_exclude.c src/ipset_exclude.h - src/ipset_load.c src/ipset_load.h - src/ipset_merge.c src/ipset_merge.h - src/ipset_optimize.c src/ipset_optimize.h - src/ipset_print.c src/ipset_print.h - src/ipset_reduce.c src/ipset_reduce.h) +# Generate config.h with the defines the source expects +include(CheckIncludeFile) +check_include_file(inttypes.h HAVE_INTTYPES_H) +check_include_file(stdint.h HAVE_STDINT_H) -include_directories(AFTER ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src) -add_definitions("-DHAVE_CONFIG_H") +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.h.in + ${CMAKE_CURRENT_BINARY_DIR}/config.h +) -add_executable(iprange_git ${SOURCE_FILES}) +set(SOURCES + src/iprange.c + src/iprange6_main.c + src/ipset.c + src/ipset6.c + src/ipset6_binary.c + src/ipset6_combine.c + src/ipset6_common.c + src/ipset6_copy.c + src/ipset6_diff.c + src/ipset6_dns.c + src/ipset6_exclude.c + src/ipset6_load.c + src/ipset6_merge.c + src/ipset6_optimize.c + src/ipset6_print.c + src/ipset_binary.c + src/ipset_combine.c + src/ipset_common.c + src/ipset_copy.c + src/ipset_diff.c + src/ipset_dns.c + src/ipset_exclude.c + src/ipset_load.c + src/ipset_merge.c + src/ipset_optimize.c + src/ipset_print.c + src/ipset_reduce.c +) + +add_executable(iprange ${SOURCES}) +target_include_directories(iprange PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/src +) +target_compile_definitions(iprange PRIVATE + HAVE_CONFIG_H + COMPARE_WITH_COMMON=1 + VERSION="${PROJECT_VERSION}" +) +target_compile_options(iprange PRIVATE + -Wall -Wextra -Wformat-signedness -Werror=format-security +) +target_link_libraries(iprange Threads::Threads) diff --git a/cmake/config.h.in b/cmake/config.h.in new file mode 100644 index 0000000..a4f396e --- /dev/null +++ b/cmake/config.h.in @@ -0,0 +1,13 @@ +/* config.h.in — generated by CMake */ + +#cmakedefine HAVE_INTTYPES_H 1 +#cmakedefine HAVE_STDINT_H 1 + +/* gcc branch optimization */ +#ifdef __GNUC__ +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif