Merge pull request #42 from TechCiel/master
Deep deduplication for text CIDR list
This commit is contained in:
commit
4eb80dad60
|
@ -41,11 +41,11 @@ jobs:
|
|||
run: |
|
||||
curl -LR -o dist/ipip_net.txt "https://raw.githubusercontent.com/17mon/china_ip_list/master/china_ip_list.txt"
|
||||
curl -LR -o dist/chunzhen.txt "https://raw.githubusercontent.com/metowolf/iplist/master/data/country/CN.txt"
|
||||
echo >> dist/chunzhen.txt # ensure newline at ending
|
||||
|
||||
- name: Merge and IP lists and remove duplicates
|
||||
- name: Merge and IP lists and deep deduplication
|
||||
run: |
|
||||
awk 'FNR==1{print ""}{print}' dist/*.txt > dist/merge.txt
|
||||
awk '!seen[$0]++' dist/merge.txt > dist/CN-ip-cidr.txt
|
||||
cat dist/*.txt | dist/dedup > dist/CN-ip-cidr.txt
|
||||
|
||||
- name: Generate GeoIP2 database
|
||||
run: |
|
||||
|
|
1
build.sh
1
build.sh
|
@ -1,2 +1,3 @@
|
|||
go build -o dist/ipip2mmdb main.go ip2cidr.go
|
||||
go build -o dist/verify_ip verify/verify_ip.go
|
||||
gcc -o dist/dedup dedup.c
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#define MASK(x) (x?(~((1u<<(32u-x))-1u)):0)
|
||||
|
||||
unsigned current;
|
||||
struct Trie {
|
||||
char flag;
|
||||
struct Trie *child[2];
|
||||
} *root=NULL;
|
||||
|
||||
char merge(struct Trie *p) {
|
||||
// this node is marked
|
||||
if(p->flag) return 1;
|
||||
// missing either child
|
||||
if(!p->child[0]||!p->child[1]) return 0;
|
||||
// true when both true;
|
||||
return (p->flag = merge(p->child[0]) && merge(p->child[1]));
|
||||
}
|
||||
|
||||
void print(struct Trie *p, unsigned depth) {
|
||||
// print whole subnet
|
||||
if(p->flag) {
|
||||
unsigned ip = current & MASK(depth);
|
||||
printf("%u.%u.%u.%u/%u\n", ip>>24&0xff, ip>>16&0xff, ip>>8&0xff, ip&0xff, depth);
|
||||
return;
|
||||
}
|
||||
// dig deeper
|
||||
if(p->child[0]) {
|
||||
current &= ~(1<<(31-depth));
|
||||
print(p->child[0], depth+1);
|
||||
}
|
||||
if(p->child[1]) {
|
||||
current |= 1<<(31-depth);
|
||||
print(p->child[1], depth+1);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
unsigned ip1, ip2, ip3, ip4, prefix_len;
|
||||
while(scanf("%u.%u.%u.%u/%u", &ip1, &ip2, &ip3, &ip4, &prefix_len)==5) {
|
||||
// convert to binary
|
||||
unsigned ip = (ip1<<24) | (ip2<<16) | (ip3<<8) | (ip4);
|
||||
unsigned mask = MASK(prefix_len);
|
||||
// build trie
|
||||
struct Trie **p = &root;
|
||||
while(mask) {
|
||||
// walk
|
||||
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
|
||||
p = &((*p)->child[ip>>31]);
|
||||
// next bit
|
||||
ip <<= 1;
|
||||
mask <<= 1;
|
||||
}
|
||||
// mark node
|
||||
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
|
||||
(*p)->flag = 1;
|
||||
}
|
||||
if(root) {
|
||||
// merge trie
|
||||
merge(root);
|
||||
// print trie
|
||||
print(root, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue