a pastebin project

Stuff

  1. #!/usr/bin/env python
  2. # -*- coding: UTF-8 -*-
  3. #Author: Roy L Zuo (roylzuo at gmail dot com)
  4. #Last Change: Wed Nov 26 12:37:24 2008 EST
  5. #Description: 根据yingjiesheng搜索关键字结果,群发简历,并保存已投
  6. #             职位具体信息至指定文件夹
  7. import urllib2, re, os, shelve, time
  8.  
  9. searches = [['linux', 'python'], ['unix','python'],['linux','金融'],
  10.             ['unix','金融'],['linux','finance'], ['unix','finance'],
  11.             ]
  12.  
  13. savepath = '%s/workspace/career/buster' %os.environ['HOME']
  14.  
  15. def getLatestJobs(keywords):
  16.     '''搜索最新工作列表,与以投列表对照,并返回未投工作之链接'''
  17.     #TODO: compare with saved pages
  18.     url0 = "http://s.yingjiesheng.com/result.jsp?keyword=%s&period=3&sort=&jobtype=1" %'+'.join(keywords)
  19.     url = url0+"&start=0"
  20.     page = urllib2.urlopen(url).read()
  21.     match = re.search("共找到(.*)条记录",page)
  22.     if not match:      return
  23.     results = re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',page)
  24.     for i in range(int(match.group(1))/10):
  25.         nurl=url0+"&start=%d0" %(i+1)
  26.         npage = urllib2.urlopen(nurl).read()
  27.         results.extend(re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',npage))
  28.     return results
  29.  
  30. def getEmailAddress(url, savepath):
  31.     '''查找页面,看是否有email地址,返回email地址'''
  32.     page = urllib2.urlopen(url).read()
  33.     match = re.search("(\w+(?:[-+.]\w+)*@\w+(?:[-.]\w+)*\.\w+(?:[-.]\w+)*)",page)
  34.     if not match:       return
  35.     #保存
  36.     savedir = '%s/%s' %(savepath,time.strftime("%y-%m-%d"))
  37.     if not os.path.exists(savedir):      os.mkdir(savedir)
  38.     file = open("%s/%s" %(savedir,url.split("/")[-1]),'w')
  39.     file.write(page)
  40.     file.close()
  41.     return match.group(1)
  42.  
  43. if __name__=='__main__':
  44.     import sys
  45.     #import socket
  46.     #sys.path.append("%s/workspace/python/lib" %os.environ['HOME'])
  47.     #from threadmanager import WorkerManager
  48.     #socket.setdefaulttimeout(10)
  49.  
  50.     joblist=[]
  51.     #wm = WorkerManager(30)
  52.     for item in searches:
  53.         #wm.add_job(getLatestJobs, item)
  54.     #wm.wait_for_complete()
  55.     #joblist = wm.get_result()
  56.         links = getLatestJobs(item)
  57.         if links is not None:
  58.             joblist += getLatestJobs(item)
  59.     joblist=list(set(joblist))
  60.  
  61.     submitted = shelve.open("%s/submitted" %savepath)
  62.     emails=[]
  63.     for url in joblist:
  64.         if submitted.has_key(url):      continue
  65.         #print url
  66.         e = getEmailAddress(url, savepath)
  67.         #print e
  68.         if e:
  69.             emails.append(e)
  70.             submitted[url]=e
  71.     emails=list(set(emails))
  72.     submitted.close()
  73.  
  74.     sender="Le Zuo (Roy) <lzuo@graduate.hku.hk>"
  75.     attachment="/home/roylez/workspace/career/doc/resume.pdf"
  76.     subject="应聘"
  77.     mutt = "mutt -s'%s' -e'set from=\"%s\"' -a'%s' %s <$HOME/doc/letter.txt"
  78.     subemails = shelve.open("%s/emails" %savepath)
  79.     for e in emails:
  80.         #使用mutt发送简历,内容为文件模板内容,自动添加附件
  81.         if subemails.has_key(e):      continue
  82.         print "Submitting to %s ..." %e
  83.         os.system(mutt %(subject,sender,attachment,e))
  84.         subemails[e]=''

advertising

Create a Paste

Please enter your new post below (or upload a file instead):





Please note that information posted here will not expire by default. If you want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords.

worth-right
fantasy-obligation