Merge pull request #42 from TechCiel/master
Deep deduplication for text CIDR list
This commit is contained in:
commit
4eb80dad60
|
@ -41,11 +41,11 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
curl -LR -o dist/ipip_net.txt "https://raw.githubusercontent.com/17mon/china_ip_list/master/china_ip_list.txt"
|
curl -LR -o dist/ipip_net.txt "https://raw.githubusercontent.com/17mon/china_ip_list/master/china_ip_list.txt"
|
||||||
curl -LR -o dist/chunzhen.txt "https://raw.githubusercontent.com/metowolf/iplist/master/data/country/CN.txt"
|
curl -LR -o dist/chunzhen.txt "https://raw.githubusercontent.com/metowolf/iplist/master/data/country/CN.txt"
|
||||||
|
echo >> dist/chunzhen.txt # ensure newline at ending
|
||||||
|
|
||||||
- name: Merge and IP lists and remove duplicates
|
- name: Merge and IP lists and deep deduplication
|
||||||
run: |
|
run: |
|
||||||
awk 'FNR==1{print ""}{print}' dist/*.txt > dist/merge.txt
|
cat dist/*.txt | dist/dedup > dist/CN-ip-cidr.txt
|
||||||
awk '!seen[$0]++' dist/merge.txt > dist/CN-ip-cidr.txt
|
|
||||||
|
|
||||||
- name: Generate GeoIP2 database
|
- name: Generate GeoIP2 database
|
||||||
run: |
|
run: |
|
||||||
|
|
1
build.sh
1
build.sh
|
@ -1,2 +1,3 @@
|
||||||
go build -o dist/ipip2mmdb main.go ip2cidr.go
|
go build -o dist/ipip2mmdb main.go ip2cidr.go
|
||||||
go build -o dist/verify_ip verify/verify_ip.go
|
go build -o dist/verify_ip verify/verify_ip.go
|
||||||
|
gcc -o dist/dedup dedup.c
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#define MASK(x) (x?(~((1u<<(32u-x))-1u)):0)
|
||||||
|
|
||||||
|
unsigned current;
|
||||||
|
struct Trie {
|
||||||
|
char flag;
|
||||||
|
struct Trie *child[2];
|
||||||
|
} *root=NULL;
|
||||||
|
|
||||||
|
char merge(struct Trie *p) {
|
||||||
|
// this node is marked
|
||||||
|
if(p->flag) return 1;
|
||||||
|
// missing either child
|
||||||
|
if(!p->child[0]||!p->child[1]) return 0;
|
||||||
|
// true when both true;
|
||||||
|
return (p->flag = merge(p->child[0]) && merge(p->child[1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
void print(struct Trie *p, unsigned depth) {
|
||||||
|
// print whole subnet
|
||||||
|
if(p->flag) {
|
||||||
|
unsigned ip = current & MASK(depth);
|
||||||
|
printf("%u.%u.%u.%u/%u\n", ip>>24&0xff, ip>>16&0xff, ip>>8&0xff, ip&0xff, depth);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// dig deeper
|
||||||
|
if(p->child[0]) {
|
||||||
|
current &= ~(1<<(31-depth));
|
||||||
|
print(p->child[0], depth+1);
|
||||||
|
}
|
||||||
|
if(p->child[1]) {
|
||||||
|
current |= 1<<(31-depth);
|
||||||
|
print(p->child[1], depth+1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
unsigned ip1, ip2, ip3, ip4, prefix_len;
|
||||||
|
while(scanf("%u.%u.%u.%u/%u", &ip1, &ip2, &ip3, &ip4, &prefix_len)==5) {
|
||||||
|
// convert to binary
|
||||||
|
unsigned ip = (ip1<<24) | (ip2<<16) | (ip3<<8) | (ip4);
|
||||||
|
unsigned mask = MASK(prefix_len);
|
||||||
|
// build trie
|
||||||
|
struct Trie **p = &root;
|
||||||
|
while(mask) {
|
||||||
|
// walk
|
||||||
|
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
|
||||||
|
p = &((*p)->child[ip>>31]);
|
||||||
|
// next bit
|
||||||
|
ip <<= 1;
|
||||||
|
mask <<= 1;
|
||||||
|
}
|
||||||
|
// mark node
|
||||||
|
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
|
||||||
|
(*p)->flag = 1;
|
||||||
|
}
|
||||||
|
if(root) {
|
||||||
|
// merge trie
|
||||||
|
merge(root);
|
||||||
|
// print trie
|
||||||
|
print(root, 0);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue