Merge pull request #42 from TechCiel/master

Deep deduplication for text CIDR list
This commit is contained in:
Jason Zhang 2024-05-13 15:37:10 +08:00 committed by GitHub
commit 4eb80dad60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 69 additions and 3 deletions

View File

@ -41,11 +41,11 @@ jobs:
run: | run: |
curl -LR -o dist/ipip_net.txt "https://raw.githubusercontent.com/17mon/china_ip_list/master/china_ip_list.txt" curl -LR -o dist/ipip_net.txt "https://raw.githubusercontent.com/17mon/china_ip_list/master/china_ip_list.txt"
curl -LR -o dist/chunzhen.txt "https://raw.githubusercontent.com/metowolf/iplist/master/data/country/CN.txt" curl -LR -o dist/chunzhen.txt "https://raw.githubusercontent.com/metowolf/iplist/master/data/country/CN.txt"
echo >> dist/chunzhen.txt # ensure newline at ending
- name: Merge and IP lists and remove duplicates - name: Merge and IP lists and deep deduplication
run: | run: |
awk 'FNR==1{print ""}{print}' dist/*.txt > dist/merge.txt cat dist/*.txt | dist/dedup > dist/CN-ip-cidr.txt
awk '!seen[$0]++' dist/merge.txt > dist/CN-ip-cidr.txt
- name: Generate GeoIP2 database - name: Generate GeoIP2 database
run: | run: |

View File

@ -1,2 +1,3 @@
go build -o dist/ipip2mmdb main.go ip2cidr.go go build -o dist/ipip2mmdb main.go ip2cidr.go
go build -o dist/verify_ip verify/verify_ip.go go build -o dist/verify_ip verify/verify_ip.go
gcc -o dist/dedup dedup.c

65
dedup.c Normal file
View File

@ -0,0 +1,65 @@
#include <stdio.h>
#include <stdlib.h>
#define MASK(x) (x?(~((1u<<(32u-x))-1u)):0)
unsigned current;
struct Trie {
char flag;
struct Trie *child[2];
} *root=NULL;
char merge(struct Trie *p) {
// this node is marked
if(p->flag) return 1;
// missing either child
if(!p->child[0]||!p->child[1]) return 0;
// true when both true;
return (p->flag = merge(p->child[0]) && merge(p->child[1]));
}
void print(struct Trie *p, unsigned depth) {
// print whole subnet
if(p->flag) {
unsigned ip = current & MASK(depth);
printf("%u.%u.%u.%u/%u\n", ip>>24&0xff, ip>>16&0xff, ip>>8&0xff, ip&0xff, depth);
return;
}
// dig deeper
if(p->child[0]) {
current &= ~(1<<(31-depth));
print(p->child[0], depth+1);
}
if(p->child[1]) {
current |= 1<<(31-depth);
print(p->child[1], depth+1);
}
}
int main() {
unsigned ip1, ip2, ip3, ip4, prefix_len;
while(scanf("%u.%u.%u.%u/%u", &ip1, &ip2, &ip3, &ip4, &prefix_len)==5) {
// convert to binary
unsigned ip = (ip1<<24) | (ip2<<16) | (ip3<<8) | (ip4);
unsigned mask = MASK(prefix_len);
// build trie
struct Trie **p = &root;
while(mask) {
// walk
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
p = &((*p)->child[ip>>31]);
// next bit
ip <<= 1;
mask <<= 1;
}
// mark node
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
(*p)->flag = 1;
}
if(root) {
// merge trie
merge(root);
// print trie
print(root, 0);
}
return 0;
}