commit c104b13dfd187bf45e050a84ec1ea2b90585222a Author: Антон Date: Mon Feb 2 11:02:32 2026 +0300 initial build diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..20a06be --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +.env +venv diff --git a/README.md b/README.md new file mode 100644 index 0000000..d02a531 --- /dev/null +++ b/README.md @@ -0,0 +1,269 @@ +# RIPE AS CIDR & FQDN IP Collector + +This project collects CIDR prefixes for specified Autonomous Systems (AS) from the RIPE NCC API and resolves IP addresses for specified FQDNs. It accumulates these addresses over time, maintaining a history of discovered prefixes. It also provides a FastAPI-based HTTP interface to retrieve the collected data. + +## 1. Preparation and Installation + +### Prerequisites +- Python 3.8+ +- `pip` and `venv` + +### Installation Steps +1. **Clone the repository** (or copy the files) to your desired location, e.g., `/opt/ripe_collector`. + ```bash + mkdir -p /opt/ripe_collector + cd /opt/ripe_collector + # Copy files: cidr_collector.py, api_server.py, requirements.txt, config.json + ``` + +2. **Create a Virtual Environment**: + ```bash + python3 -m venv venv + ``` + +3. **Install Dependencies**: + ```bash + source venv/bin/activate + pip install -r requirements.txt + deactivate + ``` + +4. **Initial Configuration**: + Edit `config.json` to set your initial ASNs and FQDNs. + ```json + { + "asns": [62041], + "fqdns": ["google.com"] + } + ``` + +--- + +## 2. Running the Collector (Periodic Task) + +The collector script `cidr_collector.py` is designed to run once per day to fetch updates. + +### Manual Run +```bash +/opt/ripe_collector/venv/bin/python3 /opt/ripe_collector/cidr_collector.py run +``` + +### Setup Cron Job (Recommended) +To run daily at 02:00 AM: + +1. Open crontab: + ```bash + crontab -e + ``` +2. Add the line: + ```cron + 0 2 * * * /opt/ripe_collector/venv/bin/python3 /opt/ripe_collector/cidr_collector.py run >> /var/log/ripe_collector.log 2>&1 + ``` + +--- + +## 3. Application Setup: Systemd (Ubuntu, Debian) + +This section describes how to run the **API Server** (`api_server.py`) as a system service. + +### Create Service File +Create `/etc/systemd/system/ripe-api.service`: + +```ini +[Unit] +Description=RIPE CIDR Collector API +After=network.target + +[Service] +User=root +# Change User=root to a generic user if desired, ensure they have write access to data.json/fqdn_data.json +WorkingDirectory=/opt/ripe_collector +ExecStart=/opt/ripe_collector/venv/bin/uvicorn api_server:app --host 0.0.0.0 --port 8000 +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +### Enable and Start +```bash +# Reload systemd +sudo systemctl daemon-reload + +# Enable service to start on boot +sudo systemctl enable ripe-api + +# Start service immediately +sudo systemctl start ripe-api + +# Check status +sudo systemctl status ripe-api +``` + +--- + +## 4. Application Setup: RC-Script (Alpine Linux) + +For Alpine Linux using OpenRC. + +### Create Init Script +Create `/etc/init.d/ripe-api`: + +```sh +#!/sbin/openrc-run + +name="ripe-api" +description="RIPE CIDR Collector API" +command="/opt/ripe_collector/venv/bin/uvicorn" +# --host and --port and module:app passed as arguments +command_args="api_server:app --host 0.0.0.0 --port 8000" +command_background="yes" +pidfile="/run/${RC_SVCNAME}.pid" +directory="/opt/ripe_collector" + +depend() { + need net +} +``` + +### Make Executable +```bash +chmod +x /etc/init.d/ripe-api +``` + +### Enable and Start +```bash +# Add to default runlevel +rc-update add ripe-api default + +# Start service +service ripe-api start + +# Check status +service ripe-api status +``` + +--- + +## 5. API Usage Documentation + +The API runs by default on port `8000`. It allows retrieving the collected data in a flat JSON list. + +### Base URL +`http://:8000` + +### Endpoint: Get Addresses +**GET** `/addresses` + +Retrieves the list of collected IP addresses/CIDRs. + +| Parameter | Type | Required | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| `type` | string | No | `all` | Filter by source type. Options: `cidr` (ASNs only), `fqdn` (Domains only), `all` (Both). | + +#### Example 1: Get All Addresses (Default) + +**Request:** +```bash +curl -X GET "http://localhost:8000/addresses" +``` + +**Response (JSON):** +```json +[ + "142.250.1.1", + "149.154.160.0/22", + "149.154.160.0/23", + "2001:4860:4860::8888", + "91.108.4.0/22" +] +``` + +#### Example 2: Get Only CIDRs (from ASNs) + +**Request:** +```bash +curl -X GET "http://localhost:8000/addresses?type=cidr" +``` + +**Response (JSON):** +```json +[ + "149.154.160.0/22", + "149.154.160.0/23", + "91.108.4.0/22" +] +``` + +#### Example 3: Get Only Resolved IPs (from FQDNs) + +**Request:** +```bash +curl -X GET "http://localhost:8000/addresses?type=fqdn" +``` + +**Response (JSON):** +```json +[ + "142.250.1.1", + "2001:4860:4860::8888" +] +``` + +### Endpoint: Manage Schedule +**GET** `/schedule` +Returns the current cron schedules. + +**POST** `/schedule` +Updates the schedule for a specific collector type. +Body: +```json +{ + "type": "asn", + "cron": "*/15 * * * *" +} +``` +*Note: `type` can be `asn` or `fqdn`.* + +--- + +## 6. Advanced CLI Usage + +The collector script supports running modes independently: + +```bash +# Run both (Default) +python3 cidr_collector.py run + +# Run only ASN collection +python3 cidr_collector.py run --mode asn + +# Run only FQDN collection +python3 cidr_collector.py run --mode fqdn +``` + +--- + +## 7. Internal Logic & Architecture + +### Collector Logic +When the collector runs (whether manually or via schedule): +1. **Instantiation**: Creates a new instance of `CIDRCollector` or `FQDNCollector`. This forces a fresh read of `config.json`, ensuring any added ASNs/FQDNs are immediately processed. +2. **Fetching**: + * **ASN**: Queries RIPE NCC API (`stat.ripe.net`). + * **FQDN**: Uses Python's `socket.getaddrinfo` to resolve A and AAAA records. +3. **Comparison**: Reads existing `data.json`/`fqdn_data.json`. It compares the fetched set with the stored set. +4. **Accumulation**: It effectively performs a Union operation (Old U New). + * **If new items found**: The list is updated, sorting is applied, and `last_updated` timestamp is refreshed for that specific resource. + * **If no new items**: The file is untouched. +5. **Persistence**: Checks are performed to ensure data is only written to disk if changes actually occurred. + +### Scheduler Logic +The `api_server.py` uses `APScheduler` (BackgroundScheduler). + +1. **Startup**: When the server starts (`uvicorn`), `start_scheduler` is called. It loads the `schedule` block from `config.json` and creates two independent jobs (`asn_job`, `fqdn_job`). +2. **Runtime Updates (POST /schedule)**: + * The server validates the new cron expression. + * It updates `config.json` so the change survives restarts. + * It calls `scheduler.add_job(..., replace_existing=True)`. This hot-swaps the trigger for the running job. +3. **Concurrency**: If a scheduled job is already running when a new schedule is posted, the running job completes normally. The new schedule applies to the *next* calculated run time. diff --git a/api_server.py b/api_server.py new file mode 100644 index 0000000..da3d62c --- /dev/null +++ b/api_server.py @@ -0,0 +1,134 @@ +from fastapi import FastAPI, Query, Body, HTTPException +from enum import Enum +import json +import os +from typing import List, Dict +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.cron import CronTrigger +from cidr_collector import CIDRCollector, FQDNCollector, load_full_config + +app = FastAPI(title="RIPE CIDR/FQDN API") + +class AddressType(str, Enum): + cidr = "cidr" + fqdn = "fqdn" + all_types = "all" + +DATA_FILE = "data.json" +FQDN_DATA_FILE = "fqdn_data.json" +CONFIG_FILE = "config.json" + +scheduler = BackgroundScheduler() + +# Wrapper functions for scheduler +def run_asn_job(): + print("Running scheduled ASN collection...") + # Re-instantiate to ensure fresh config + collector = CIDRCollector() + collector.run_collection() + +def run_fqdn_job(): + print("Running scheduled FQDN collection...") + # Re-instantiate to ensure fresh config + collector = FQDNCollector() + collector.run_collection() + +def load_json(filename): + if not os.path.exists(filename): + return {} + try: + with open(filename, 'r') as f: + return json.load(f) + except Exception: + return {} + +def get_cidrs() -> List[str]: + data = load_json(DATA_FILE) + cidrs = set() + for asn_data in data.values(): + for prefix in asn_data.get("prefixes", []): + cidrs.add(prefix) + return list(cidrs) + +def get_fqdn_ips() -> List[str]: + data = load_json(FQDN_DATA_FILE) + ips = set() + for domain_data in data.values(): + for ip in domain_data.get("ips", []): + ips.add(ip) + return list(ips) + +@app.on_event("startup") +def start_scheduler(): + config = load_full_config() + schedule_config = config.get("schedule", {}) + + asn_cron = schedule_config.get("asn", "0 2 * * *") + fqdn_cron = schedule_config.get("fqdn", "0 3 * * *") + + # Add jobs + scheduler.add_job(run_asn_job, CronTrigger.from_crontab(asn_cron), id="asn_job", replace_existing=True) + scheduler.add_job(run_fqdn_job, CronTrigger.from_crontab(fqdn_cron), id="fqdn_job", replace_existing=True) + + scheduler.start() + print(f"Scheduler started. ASN: {asn_cron}, FQDN: {fqdn_cron}") + +@app.on_event("shutdown") +def shutdown_scheduler(): + scheduler.shutdown() + +@app.get("/addresses", response_model=List[str]) +def get_addresses(type: AddressType = Query(AddressType.all_types, description="Filter by address type")): + results = set() + + if type in [AddressType.cidr, AddressType.all_types]: + results.update(get_cidrs()) + + if type in [AddressType.fqdn, AddressType.all_types]: + results.update(get_fqdn_ips()) + + return sorted(list(results)) + +@app.get("/schedule") +def get_schedule(): + config = load_full_config() + return config.get("schedule", {}) + +@app.post("/schedule") +def update_schedule(schedule_update: Dict[str, str] = Body(..., example={"type": "asn", "cron": "*/10 * * * *"})): + job_type = schedule_update.get("type") + cron_str = schedule_update.get("cron") + + if job_type not in ["asn", "fqdn"]: + raise HTTPException(status_code=400, detail="Invalid type. Must be 'asn' or 'fqdn'.") + + if not cron_str: + raise HTTPException(status_code=400, detail="Cron string required.") + + # Validate cron string by attempting to create trigger + try: + trigger = CronTrigger.from_crontab(cron_str) + except Exception as e: + raise HTTPException(status_code=400, detail=f"Invalid cron string: {e}") + + # Update config file + config = load_full_config() + if "schedule" not in config: + config["schedule"] = {} + + config["schedule"][job_type] = cron_str + + # Save config + with open(CONFIG_FILE, 'w') as f: + json.dump(config, f, indent=4) + + # Update running job + job_id = f"{job_type}_job" + func = run_asn_job if job_type == "asn" else run_fqdn_job + scheduler.add_job(func, trigger, id=job_id, replace_existing=True) + + return {"message": "Schedule updated", "type": job_type, "cron": cron_str} + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/cidr_collector.py b/cidr_collector.py new file mode 100644 index 0000000..6de5bae --- /dev/null +++ b/cidr_collector.py @@ -0,0 +1,286 @@ +import json +import requests +import datetime +import os +import argparse +import sys +import socket + +CONFIG_FILE = "config.json" +DATA_FILE = "data.json" +FQDN_DATA_FILE = "fqdn_data.json" +BASE_URL = "https://stat.ripe.net/data/announced-prefixes/data.json" + +def load_full_config(): + if not os.path.exists(CONFIG_FILE): + return {"asns": [], "fqdns": []} + try: + with open(CONFIG_FILE, 'r') as f: + return json.load(f) + except json.JSONDecodeError: + return {"asns": [], "fqdns": []} + +def save_full_config(config): + with open(CONFIG_FILE, 'w') as f: + json.dump(config, f, indent=4) + +class CIDRCollector: + def __init__(self): + self.config = load_full_config() + self.asns = self.config.get("asns", []) + + def save_config(self): + self.config["asns"] = self.asns + save_full_config(self.config) + + def add_asn(self, asn): + if asn not in self.asns: + self.asns.append(asn) + self.save_config() + print(f"ASN {asn} added.") + else: + print(f"ASN {asn} already in list.") + + def remove_asn(self, asn): + if asn in self.asns: + self.asns.remove(asn) + self.save_config() + print(f"ASN {asn} removed.") + else: + print(f"ASN {asn} not found in list.") + + def list_asns(self): + print("Current ASNs:", self.asns) + + def fetch_prefixes(self, asn): + params = {'resource': f'AS{asn}'} + try: + response = requests.get(BASE_URL, params=params, timeout=10) + response.raise_for_status() + data = response.json() + + prefixes = [] + if 'data' in data and 'prefixes' in data['data']: + for item in data['data']['prefixes']: + if 'prefix' in item: + prefixes.append(item['prefix']) + return prefixes + except Exception as e: + print(f"Error fetching data for AS{asn}: {e}") + return None + + def load_data(self): + if not os.path.exists(DATA_FILE): + return {} + try: + with open(DATA_FILE, 'r') as f: + return json.load(f) + except json.JSONDecodeError: + return {} + + def save_data(self, data): + with open(DATA_FILE, 'w') as f: + json.dump(data, f, indent=4) + + def run_collection(self): + current_data = self.load_data() + updated = False + + current_time = datetime.datetime.now().isoformat() + + print("Starting ASN CIDR collection...") + for asn in self.asns: + str_asn = str(asn) + print(f"Processing AS{asn}...") + fetched_prefixes = self.fetch_prefixes(asn) + + if fetched_prefixes is None: + continue + + fetched_set = set(fetched_prefixes) + + # Initialize if ASN not present + if str_asn not in current_data: + current_data[str_asn] = { + "last_updated": current_time, + "prefixes": sorted(list(fetched_set)) + } + print(f" - New ASN. Added {len(fetched_set)} prefixes.") + updated = True + else: + existing_prefixes = set(current_data[str_asn].get("prefixes", [])) + + # Check for new prefixes + new_prefixes = fetched_set - existing_prefixes + + if new_prefixes: + # Accumulate: Union of existing and new + updated_set = existing_prefixes.union(fetched_set) + current_data[str_asn]["prefixes"] = sorted(list(updated_set)) + current_data[str_asn]["last_updated"] = current_time + print(f" - Updates found. Added {len(new_prefixes)} new prefixes.") + updated = True + else: + print(" - No new prefixes found.") + + if updated: + self.save_data(current_data) + print("CIDR Data saved to data.json") + else: + print("No CIDR changes to save.") + +class FQDNCollector: + def __init__(self): + self.config = load_full_config() + self.fqdns = self.config.get("fqdns", []) + + def save_config(self): + self.config["fqdns"] = self.fqdns + save_full_config(self.config) + + def add_fqdn(self, fqdn): + if fqdn not in self.fqdns: + self.fqdns.append(fqdn) + self.save_config() + print(f"FQDN {fqdn} added.") + else: + print(f"FQDN {fqdn} already in list.") + + def remove_fqdn(self, fqdn): + if fqdn in self.fqdns: + self.fqdns.remove(fqdn) + self.save_config() + print(f"FQDN {fqdn} removed.") + else: + print(f"FQDN {fqdn} not found in list.") + + def list_fqdns(self): + print("Current FQDNs:", self.fqdns) + + def resolve_fqdn(self, fqdn): + try: + # Resolve for both IPv4 (AF_INET) and IPv6 (AF_INET6) + # We use 0 for family to get both + results = socket.getaddrinfo(fqdn, None) + ips = set() + for result in results: + # result[4] is the sockaddr. For IP protocols, index 0 is the IP address string + ip_addr = result[4][0] + ips.add(ip_addr) + return list(ips) + except socket.gaierror as e: + print(f"Error resolving {fqdn}: {e}") + return [] + + def load_data(self): + if not os.path.exists(FQDN_DATA_FILE): + return {} + try: + with open(FQDN_DATA_FILE, 'r') as f: + return json.load(f) + except json.JSONDecodeError: + return {} + + def save_data(self, data): + with open(FQDN_DATA_FILE, 'w') as f: + json.dump(data, f, indent=4) + + def run_collection(self): + current_data = self.load_data() + updated = False + current_time = datetime.datetime.now().isoformat() + + print("Starting FQDN IP collection...") + for fqdn in self.fqdns: + print(f"Processing {fqdn}...") + resolved_ips = self.resolve_fqdn(fqdn) + + if not resolved_ips: + print(f" - No IPs resolved for {fqdn}") + continue + + fetched_set = set(resolved_ips) + + if fqdn not in current_data: + current_data[fqdn] = { + "last_updated": current_time, + "ips": sorted(list(fetched_set)) + } + print(f" - New FQDN. Added {len(fetched_set)} IPs.") + updated = True + else: + existing_ips = set(current_data[fqdn].get("ips", [])) + new_ips = fetched_set - existing_ips + + if new_ips: + updated_set = existing_ips.union(fetched_set) + current_data[fqdn]["ips"] = sorted(list(updated_set)) + current_data[fqdn]["last_updated"] = current_time + print(f" - Updates found. Added {len(new_ips)} new IPs.") + updated = True + else: + print(" - No new IPs found.") + + if updated: + self.save_data(current_data) + print(f"FQDN Data saved to {FQDN_DATA_FILE}") + else: + print("No FQDN changes to save.") + + +def main(): + parser = argparse.ArgumentParser(description="Collector for RIPE AS CIDRs and FQDN IPs") + subparsers = parser.add_subparsers(dest="command") + + # Command: run (default) + parser_run = subparsers.add_parser("run", help="Run the collection process") + parser_run.add_argument("--mode", choices=["asn", "fqdn", "all"], default="all", help="Collection mode: asn, fqdn, or all (default)") + + # ASN Commands + parser_add = subparsers.add_parser("add", help="Add an ASN") + parser_add.add_argument("asn", type=int, help="ASN to add") + + parser_remove = subparsers.add_parser("remove", help="Remove an ASN") + parser_remove.add_argument("asn", type=int, help="ASN to remove") + + # FQDN Commands + parser_add_fqdn = subparsers.add_parser("add-fqdn", help="Add an FQDN") + parser_add_fqdn.add_argument("fqdn", type=str, help="FQDN to add") + + parser_remove_fqdn = subparsers.add_parser("remove-fqdn", help="Remove an FQDN") + parser_remove_fqdn.add_argument("fqdn", type=str, help="FQDN to remove") + + # Command: list + parser_list = subparsers.add_parser("list", help="List ASNs and FQDNs") + + args = parser.parse_args() + + asn_collector = CIDRCollector() + fqdn_collector = FQDNCollector() + + if args.command == "add": + asn_collector.add_asn(args.asn) + elif args.command == "remove": + asn_collector.remove_asn(args.asn) + elif args.command == "add-fqdn": + fqdn_collector.add_fqdn(args.fqdn) + elif args.command == "remove-fqdn": + fqdn_collector.remove_fqdn(args.fqdn) + elif args.command == "list": + asn_collector.list_asns() + fqdn_collector.list_fqdns() + elif args.command == "run": + mode = args.mode + if mode in ["asn", "all"]: + asn_collector.run_collection() + + if mode == "all": + print("-" * 20) + + if mode in ["fqdn", "all"]: + fqdn_collector.run_collection() + else: + parser.print_help() + +if __name__ == "__main__": + main() diff --git a/config.json b/config.json new file mode 100644 index 0000000..272243a --- /dev/null +++ b/config.json @@ -0,0 +1,19 @@ +{ + "asns": [ + 62014, + 62041, + 59930, + 44907, + 211157, + 11917 + ], + "fqdns": [ + "api.whatsapp.com", + "web.whatsapp.com", + "faq.whatsapp.com" + ], + "schedule": { + "asn": "*/15 * * * *", + "fqdn": "0 3 * * *" + } +} \ No newline at end of file diff --git a/data.json b/data.json new file mode 100644 index 0000000..750dd21 --- /dev/null +++ b/data.json @@ -0,0 +1,55 @@ +{ + "62014": { + "last_updated": "2026-02-01T22:38:19.717894", + "prefixes": [ + "149.154.168.0/22", + "2001:b28:f23f::/48", + "91.108.16.0/22", + "91.108.56.0/23" + ] + }, + "62041": { + "last_updated": "2026-02-01T22:37:40.592937", + "prefixes": [ + "149.154.160.0/22", + "149.154.160.0/23", + "149.154.162.0/23", + "149.154.164.0/22", + "149.154.164.0/23", + "149.154.166.0/23", + "2001:67c:4e8::/48", + "91.108.4.0/22", + "91.108.56.0/22", + "91.108.8.0/22", + "95.161.64.0/20" + ] + }, + "59930": { + "last_updated": "2026-02-01T22:37:40.592937", + "prefixes": [ + "149.154.172.0/22", + "2001:b28:f23d::/48", + "91.108.12.0/22" + ] + }, + "44907": { + "last_updated": "2026-02-01T22:37:40.592937", + "prefixes": [ + "2001:b28:f23c::/48", + "91.108.20.0/22", + "91.108.20.0/23" + ] + }, + "211157": { + "last_updated": "2026-02-01T22:37:40.592937", + "prefixes": [ + "185.76.151.0/24", + "2a0a:f280:203::/48", + "91.105.192.0/23" + ] + }, + "11917": { + "last_updated": "2026-02-01T22:59:19.931110", + "prefixes": [] + } +} \ No newline at end of file diff --git a/fqdn_data.json b/fqdn_data.json new file mode 100644 index 0000000..4585b00 --- /dev/null +++ b/fqdn_data.json @@ -0,0 +1,26 @@ +{ + "google.com": { + "last_updated": "2026-02-02T08:53:20.370859", + "ips": [ + "142.250.179.174" + ] + }, + "api.whatsapp.com": { + "last_updated": "2026-02-02T09:08:23.907962", + "ips": [ + "57.144.223.32" + ] + }, + "web.whatsapp.com": { + "last_updated": "2026-02-02T09:08:23.907962", + "ips": [ + "57.144.223.32" + ] + }, + "faq.whatsapp.com": { + "last_updated": "2026-02-02T09:08:23.907962", + "ips": [ + "57.144.223.32" + ] + } +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0b65ed8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests +fastapi +uvicorn +APScheduler