python抓取
• _CONTENT_TYPES = { '.png': 'image/png', '.gif': 'image/gif', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.jpe': 'image/jpeg' }
• def _guess_content_type(ext): • return _CONTENT_TYPES.get(ext, 'application/octet-stream') • _HTTP_GET = 0 • _HTTP_POST = 1 • _HTTP_UPLOAD = 2 • def _http_get(url, authorization=None, **kw): • ('GET %s' % url) • return _http_call(url, _HTTP_GET, authorization, **kw) • def _http_post(url, authorization=None, **kw): • ('POST %s' % url) • return _http_call(url, _HTTP_POST, authorization, **kw)
• • • • • • • • • • • • • • •
def set_access_token(self, access_token, expires_in): self.access_token = str(access_token) self.expires = float(expires_in) def get_authorize_url(self, redirect_uri=None, display='default'): ''' 返回authroize URL应该重定向. ''' redirect = redirect_uri if redirect_uri else self.redirect_uri if not redirect: raise APIError('21305', 'Parameter absent: redirect_uri', 'OAuth2 request') return '%s%s?%s' % (self.auth_url, 'authorize', \ _encode_params(client_id = self.client_id, \ response_type = 'code', \ display = display, \ redirect_uri = redirect))
• • •• • • • • • • • • • • • •
class APIClient(object): ''' 使用同步调用的API客户端. ''' def __init__(self, app_key, app_secret, redirect_uri=None, response_type='code', domain='', version='2'): self.client_id = app_key self.client_secret = app_secret self.redirect_uri = redirect_uri self.response_type = response_type self.auth_url = 'https://%s/oauth2/' % domain self.api_url = 'https://%s/%s/' % (domain, version) self.access_token = None self.expires = 0.0 self.get = HttpObject(self, _HTTP_GET) self.post = HttpObject(self, _HTTP_POST) self.upload = HttpObject(self, _HTTP_UPLOAD)
• • def __str__(self): return 'APIError: %s: %s, request: %s' % (self.error_code, self.error, self.request)
一般的json对象既可以绑定任何对象,也可以作为字典
• • • • • • • • • • • • • class JsonObject(dict): def __getattr__(self, attr): return self[attr] def __setattr__(self, attr, value): self[attr] = value def _encode_params(**kw):编码参数 args = [] for k, v in kw.iteritems(): qv = v.encode('utf-8') if isinstance(v, unicode) else str(v) args.append('%s=%s' % (k, urllib.quote(qv))) return '&'.join(args)
• • •
• • • • • • • • • • •
def request_access_token(self, code, redirect_uri=None): ''' 返回访问令牌的对象:{“ACCESS_TOKEN”:“你的访问令牌”, “expires_in”:12345678} ''' redirect = redirect_uri if redirect_uri else self.redirect_uri if not redirect: raise APIError('21305', 'Parameter absent: redirect_uri', 'OAuth2 request') r = _http_post('%s%s' % (self.auth_url, 'access_token'), \ client_id = self.client_id, \ client_secret = self.client_secret, \ redirect_uri = redirect, \ code = code, grant_type = 'authorization_code') r.expires_in += int(time.time()) return r
• • • •
• • • • •
•
if authorization: req.add_header('Authorization', 'OAuth2 %s' % authorization) if boundary: req.add_header('Content-Type', 'multipart/form-data; boundary=%s' % boundary) resp = urllib2.urlopen(req) body = resp.read() r = json.loads(body, object_hook=_obj_hook) if hasattr(r, 'error_code'): raise APIError(r.error_code, getattr(r, 'error', ''), getattr(r, 'request', '')) return r
• else: • data.append('Content-Disposition: form-data; name="%s"\r\n' % k) • data.append(v.encode('utf-8') if isinstance(v, unicode) else v) • data.append('--%s--\r\n' % boundary) • return '\r\n'.join(data), boundary
• • • • • • • • • • • • • • •
def _http_upload(url, authorization=None, **kw): ('MULTIPART POST %s' % url) return _http_call(url, _HTTP_UPLOAD, authorization, **kw) def _http_call(url, method, authorization, **kw): ''' 发送一个HTTP请求,并期望,如果没有错误,返回一个JSON对象''' params = None boundary = None if method==_HTTP_UPLOAD: params, boundary = _encode_multipart(**kw) else: params = _encode_params(**kw) http_url = '%s?%s' % (url, params) if method==_HTTP_GET else url http_body = None if method==_HTTP_GET else params req = urllib2.Request(http_url, data=http_body)