#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#Author: Roy L Zuo (roylzuo at gmail dot com)
#Last Change: Wed Nov 26 12:37:24 2008 EST
#Description: 根据yingjiesheng搜索关键字结果,群发简历,并保存已投
# 职位具体信息至指定文件夹
import urllib2, re, os, shelve, time
searches = [['linux', 'python'], ['unix','python'],['linux','金融'],
['unix','金融'],['linux','finance'], ['unix','finance'],
]
savepath = '%s/workspace/career/buster' %os.environ['HOME']
def getLatestJobs(keywords):
'''搜索最新工作列表,与以投列表对照,并返回未投工作之链接'''
#TODO: compare with saved pages
url0 = "http://s.yingjiesheng.com/result.jsp?keyword=%s&period=3&sort=&jobtype=1" %'+'.join(keywords)
url = url0+"&start=0"
page = urllib2.urlopen(url).read()
match = re.search("共找到(.*)条记录",page)
if not match: return
results = re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',page)
for i in range(int(match.group(1))/10):
nurl=url0+"&start=%d0" %(i+1)
npage = urllib2.urlopen(nurl).read()
results.extend(re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',npage))
return results
def getEmailAddress(url, savepath):
'''查找页面,看是否有email地址,返回email地址'''
page = urllib2.urlopen(url).read()
match = re.search("(\w+(?:[-+.]\w+)*@\w+(?:[-.]\w+)*\.\w+(?:[-.]\w+)*)",page)
if not match: return
#保存
savedir = '%s/%s' %(savepath,time.strftime("%y-%m-%d"))
if not os.path.exists(savedir): os.mkdir(savedir)
file = open("%s/%s" %(savedir,url.split("/")[-1]),'w')
file.write(page)
file.close()
return match.group(1)
if __name__=='__main__':
import sys
#import socket
#sys.path.append("%s/workspace/python/lib" %os.environ['HOME'])
#from threadmanager import WorkerManager
#socket.setdefaulttimeout(10)
joblist=[]
#wm = WorkerManager(30)
for item in searches:
#wm.add_job(getLatestJobs, item)
#wm.wait_for_complete()
#joblist = wm.get_result()
links = getLatestJobs(item)
if links is not None:
joblist += getLatestJobs(item)
joblist=list(set(joblist))
submitted = shelve.open("%s/submitted" %savepath)
emails=[]
for url in joblist:
if submitted.has_key(url): continue
#print url
e = getEmailAddress(url, savepath)
#print e
if e:
emails.append(e)
submitted[url]=e
emails=list(set(emails))
submitted.close()
sender="Le Zuo (Roy) <lzuo@graduate.hku.hk>"
attachment="/home/roylez/workspace/career/doc/resume.pdf"
subject="应聘"
mutt = "mutt -s'%s' -e'set from=\"%s\"' -a'%s' %s <$HOME/doc/letter.txt"
subemails = shelve.open("%s/emails" %savepath)
for e in emails:
#使用mutt发送简历,内容为文件模板内容,自动添加附件
if subemails.has_key(e): continue
print "Submitting to %s ..." %e
os.system(mutt %(subject,sender,attachment,e))
subemails[e]=''