最新英文单词表的域名未注册英文单词表 - 主机头

#!/usr/bin/python
#coding=utf-8
import urllib
import urllib2
import re
import threading
import Queue
url = 'https://who.is/whois/name_search/'
headers = {'Host':'who.is',
'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:15.0) Gecko/20100101 Firefox/15.0'}
pattern_str = '<header>Available</header>[\w\W^tl]*?tld[\W]+([\w]+)</div>'
pattern = re.compile(pattern_str)
missed_word = Queue.Queue()
def query_domain(name):
try:
op = urllib2.urlopen(url+name)
result = op.read()
except:
missed_word.put(name)
return None
op.close()
outlst = []
for rst in pattern.finditer(result):
outlst.append('{0}.{1}'.format(name,rst.group(1)))
return outlst
def gen_words():
wdlst = open('/usr/share/dict/words', 'r').read().split()
lst = [Queue.Queue() for i in range(25)]
for wd in wdlst:
if re.match('^[a-zA-Z]*感谢
,wd):
lst[len(wd)].put(wd)
return lst
lock_print = threading.Lock()
def search_ava_domain(word_queue):
while True:
if word_queue.empty():
break
word = word_queue.get()
rst = query_domain(word)
if rst:
with lock_print:
print ' '.join(rst)
word_queue.task_done()
def thread_search(word_queue, thread_num=35):
for i in xrange(thread_num):
threading.Thread(None, target=search_ava_domain, args=(word_queue,)).start()
word_queue.join()
if missed_word.qsize() > 0:
thread_search(missed_word)
def main():
word_lst = gen_words()
for q in word_lst[5:]:
thread_search(q)
if __name__ == '__main__':
main()

复制代码