#!/usr/bin/python3

# Author: Paul Wise <pabs@debian.org>
# License: GNU GPLv3 or later

import os
import re
import ldap
from pprint import pprint

ldap_server = 'ldaps://db.debian.org'
ldap_hosts_search = 'ou=hosts,dc=debian,dc=org'
ldap_search = '(sponsor=*)'
ldap_attributes = ['sponsor', 'hostname', 'purpose', 'architecture', 'description']
l = ldap.initialize(ldap_server)
r = l.search_s(ldap_hosts_search, ldap.SCOPE_SUBTREE, ldap_search, ldap_attributes)
hosting_sponsors = dict()
for dn, e in r:
	#pprint(e)
	host = e['hostname'][0].decode('UTF-8')
	arch = e['architecture'][0].decode('UTF-8')
	raw_purposes = e.get('purpose', [])
	purposes = set()
	for raw_purpose in raw_purposes:
		raw_purpose = raw_purpose.decode('UTF-8')
		purpose = re.sub(r'\[\[[-*]?([^| ]+)\|([^\]]+)]]', r'\2', raw_purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'\[\[[-*]?([^| ]+)]]', r'\1', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'^.+\.debian\.org - (.*)', r'\1', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'\.deb(ian|conf)\.(org|net)', '', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'(mirror) \([^)]+\)', r'\1', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'ganeti[0-9]*\.[^ ]*', 'vm host', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'ganeti/kvm host', 'vm host', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'syncproxy[0-9]*\.[^ ]*', 'mirror sync', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r'sync proxy', 'mirror sync', purpose)
		#print(host, raw_purpose, '=>', purpose)
		purpose = re.sub(r' for Debian services', '', purpose)
		#print(host, raw_purpose, '=>', purpose)
		if host.endswith('.debian.net'):
			purpose = re.sub(r'(buildd|porterbox)', r'unofficial \1', purpose)
		#print(host, raw_purpose, '=>', purpose)
		if purpose:
			purposes.add(purpose)
	if not purposes:
		purposes = set([purpose.decode('UTF-8') for purpose in e['description']])
	if not purposes:
		purposes = set()
	host =  re.sub(r'\.debian\.(org|net)', '', host)
	sponsors = e.get('sponsor')
	for sponsor in sponsors:
		hosting_sponsor = False
		sponsor = sponsor.decode('UTF-8')
		match = re.search(r' *\([^)]+\)? *$', sponsor)
		if match:
			type = match.group(0)
			sponsor = re.sub(r' *\([^)]+\)? *$', '', sponsor)
			sponsor = re.sub(r'&amp;', '&', sponsor)
			sponsor = re.sub(r' \(UBC\)', '', sponsor)
			if 'hosting' in type:
				hosting_sponsor = True
		else:
			hosting_sponsor = True
		if hosting_sponsor:
			hosting_sponsors[sponsor] = hosting_sponsors.get(sponsor, set()) | purposes

#pprint(hosting_sponsors)

def show(name=None, url='', purposes=set()):
	print(name, url, '=>', ', '.join(sorted(purposes)))

for sponsor in sorted(hosting_sponsors):
	match = re.search(r'^\[\[([^|]+)\|([^\]]+)]]$', sponsor)
	if match:
		show(name=match.group(2), url='https://{}'.format(match.group(1)), purposes=hosting_sponsors[sponsor])
	else:
		show(name=sponsor, url='', purposes=hosting_sponsors[sponsor])

