python crawler : example 1 : crawl google play search word

crawl google play search word

# 需要的插件
pip install requests requests[socks]


#! /usr/bin/env python
# -*- coding:utf-8 -*-


import urllib
import requests
import sys
import json
import os.path


# 自动查找系统上安装的证书;添加实际的路径.
def check_ca_cert():
    # 可能存在的路径
    ca_certs = [
        "/etc/pki/tls/certs/ca-bundle.crt",
        "/etc/ssl/certs/ca-certificates.crt",
    ]
    for f in ca_certs:
        if os.path.exists(f):
            return f
    return False


def get_lenovo_word(key_word, proxies=None):
    url = "https://market.android.com/suggest/SuggRequest" \
          "?json=1&c=3&query={key_word}&hl=zh&gl=CN".format(
        key_word=urllib.quote_plus(key_word))
    print "URL:", url
    word_list = None
    try:
        verify = check_ca_cert()
        resp = requests.get(url, proxies=proxies, verify=verify)
        if resp.status_code != 200:
            print "Net Error!"
            return None
        text = resp.text
        if not text:
            print "Net Get None!"
            return None
        val_list = json.loads(text)
        word_list = []
        for val in val_list:
            word = val["s"]
            word_list.append(word)
    except Exception as e:
        print "parse json exception:", e
    finally:
        return word_list


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print "Usage:{myname} <key word>".format(myname=sys.argv[0])
        exit(1)
    # 从命令行获取搜索的词
    key_word = sys.argv[1]
    # 设置代理
    my_proxies = {
        "https": "socks5://127.0.0.1:1080",
        "http": "socks5://127.0.0.1:1080"
    }
    word_list = get_lenovo_word(key_word, my_proxies)
    if word_list:
        for word in word_list:
            print word



(141)

发表评论

电子邮件地址不会被公开。 必填项已用*标注

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据