<small id='Z4CdQO'></small> <noframes id='omHZK'>

  • <tfoot id='oW2wYK3'></tfoot>

      <legend id='db34Z'><style id='V9xOrlD'><dir id='9CI1P'><q id='t7Fc'></q></dir></style></legend>
      <i id='zsPat4KmxS'><tr id='A20ukUga'><dt id='KfEmsipMR2'><q id='9EOUGJnjt'><span id='yJbTX1j6k'><b id='8RkVZK'><form id='YpVD'><ins id='zWYy6Hvdl'></ins><ul id='JktuXTp'></ul><sub id='iMtg6'></sub></form><legend id='0VW3sciaj'></legend><bdo id='8JRA'><pre id='xQKvp4'><center id='CvWt5OxFkJ'></center></pre></bdo></b><th id='Ie6ho5'></th></span></q></dt></tr></i><div id='bMhJ'><tfoot id='16p5C'></tfoot><dl id='dnqZ5XCB'><fieldset id='5X4lRx'></fieldset></dl></div>

          <bdo id='zogc70'></bdo><ul id='POyqA69'></ul>

          1. <li id='PQGwCy'></li>
            登陆

            python多线程爬取youtube视频,外面的国际很精彩

            admin 2019-09-06 201人围观 ,发现0个评论

            开发环境:

            • python2.7 + win10

            开端先说一下,拜访youtube需求那啥的,请自行解决,最好是大局署理。

            完结代码:

            from bs4 import BeautifulSoup
            import lxml
            import Queue
            import requests
            import re,os,sys,random
            import threading
            import logging
            import json,hashlib,urllib
            from requests.exceptions import ConnectTimeout,Cpython多线程爬取youtube视频,外面的国际很精彩onnectionError,ReadTimeout,SSLError,MissingSchema,ChunkedEncodingError
            import random
            '''
            遇到不明白的问题?Python学习沟通群:821460695满意你的需求,材料都现已上传群文件,能够自行下载!
            '''
            reload(sys)
            sys.setdefaultencoding('gbk')
            # 日志模块
            logger = logging.getLogger("AppName")
            formatter = logging.Formatter('%(asctime)s %(levelname)-5s: %(message)s')
            console_handler三国梦想 = logging.StreamHandler(sys.stdout)
            console_handler.formatter = formatter
            logger.addHandler(console_handler)
            logger.setLevel(logging.INFO)
            q = Queue.Queue() # url行列
            page_q = Queue.Queue() # 页面
            def downlaod(q,x,path):
            urlhash = "https://weibomiaopai.com/"
            try:
            html = requests.get(urlhash).text
            except SSLError:
            logger.info(u"网络不稳定 正在重试")
            html = requests.get(urlhash).text
            reg = re.compile(r'var hash="(.*?)"', re.S)
            result = reg.findall(html)
            hash_v = result[0]
            while True:
            data = q.get()
            url, name = data[0], data[1].strip().replace("|", "")
            file = os.path.join(path, '%s' + ".mp4") % name
            api = "https://steakovercooked.com/api/video/?cached&hash=" + hash_v + "&video=" + url
            api2 = "https://helloacm.com/api/video/?cached&hash=" + hash_v + "&video=" + url
            try:
            res = requests.get(api)
            result = json.loads(res.text)
            except (ValueError,SSLError):
            try:
            res = requests.get(api2)
            result = json.loads(res.text)
            except (ValueError,SSLError):
            q.task_done()
            return False
            vurl = result['url']
            logger.info(u"正在下载:%s" %name)
            try:
            r = requests.get(vurl)
            except SSLError:
            r = requests.get(vurl)
            except MissingSchema:
            q.task_done()
            continue
            try:
            with open(file,'wb') as f:
            f.write(r.content)
            except IOError:
            name = u'好开心么么哒 %s' % random.randint(1,9999)
            file = python多线程爬取youtube视频,外面的国际很精彩os.path.join(path, '%s' + ".mp4") % name
            with open(file,'wb') as f:
            f.write(r.content)
            logger.info(u"下载完结:%s" %name)
            q.task_done()
            def get_page(keyword,page_q):
            while True:
            headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'
            }
            page = page_q.get()
            url = "https://www.youtube.com/results?sp=EgIIAg%253D%253D&search_query=" + keyword + "&page=" + str(page)
            try:
            html = requests.get(url, headers=headers).text
            except (ConnectTimeout,ConnectionError):
            print u"不能拜访youtube 查看是否已FQ"
            os._exit(0)
            reg = re.compile(r'"url":"/watch\?v=(.*?)","webPageType"', re.S)
            result = reg.findall(html)
            logger.info(u"第 %s 页" % page)
            for x in result:
            vurl = "https://www.youtube.com/watch?v=" + x
            try:
            res = requests.get(vurl).text
            except (ConnectionError,ChunkedEncodingError):
            logger.info(u"网络不稳定 正在重试")
            try:
            res = requests.get(vurl).text
            except SSLError:
            continue
            reg2 = re.compile(r"(.*?python多线程爬取youtube视频,外面的国际很精彩)YouTube",re.S)<br> name = reg2.findall(res)[0].replace("-","")<br> if u'\\u4e00' <= keyword <= u'\\u9fff':<br> q.put([vurl, npython多线程爬取youtube视频,外面的国际很精彩ame])<br> else:<br> # 调用金山词霸<br> logger.info(u"正在翻译")<br> url_js = "http://www.iciba.com/" + name<br> html2 = requests.get(url_js).text<br> soup = BeautifulSoup(html2, "lxml")<br> try:<br> res2 python多线程爬取youtube视频,外面的国际很精彩= soup.select('.clearfix')[0].get_text()<br> title = res2.split("\n")[2]<br> except IndexError:<br> title = u'好开心么么哒 %s' % random.randint(1, 9999)<br> q.put([vurl, title])<br> page_q.task_done()<br>def main():<br> #python多线程爬取youtube视频,外面的国际很精彩 运用协助<br> keyword = raw_input(u"请输入关键字:").decode("gbk")<br> threads = int(raw_input(u"请输入线程数量(主张1-10): "))<br> # 判别目录<br> path = 'D:\youtube\%s' % keyword<br> if os.path.exists(path) == False:<br> os.makedirs(path)<br> # 解析网页<br> logger.info(u"开端解析网页")<br> for page in range(1,26):<br> page_q.put(page)<br> for y in range(threads):<br> t = threading.Thread(target=get_page,args=(keyword,page_q))<br> t.setDaemon(True)<br> t.start()<br> page_q.join()<br> logger.info(u"共 %s 视频" % q.qsize())<br> # 多线程下载<br> logger.info(u"开端下载视频")<br> for x in range(threads):<br> t = threading.Thread(target=downlaod,args=(q,x,path))<br> t.setDaemon(True)<br> t.start()<br> q.join()<br> logger.info(u"悉数视频下载完结!")<br>main()<br>
            请关注微信公众号
            微信二维码
            不容错过
            Powered By Z-BlogPHP