day 35

s15day34 进程 
内容回顾:
    1. GIL锁 
    2. 进程和线程的区别?
        第一:
            进程是cpu资源分配的最小单元。
            线程是cpu计算的最小单元。
        第二:
            一个进程中能够有多个线程。
        第三:
            对于Python来讲他的进程和线程和其余语言有差别,是有GIL锁。
            GIL锁保证一个进程中同一时刻只有一个线程被cpu调度。
            
        注意:IO密集型操做能够使用多线程;计算密集型能够使用多进程;
    
    3. Lock和RLock
    
    4. 线程池 
        
    5. threading.local 
    
    6. 经常使用方法
    
    7. 面向对象补充:
        class Foo(object):

            def __init__(self):
                object.__setattr__(self, 'info', {}) # 在对象中设置值的本质

            def __setattr__(self, key, value):
                self.info[key] = value

            def __getattr__(self, item):
                print(item)
                return self.info[item]

        obj = Foo()
        obj.name = 'alex'
        print(obj.name)

        
        
今日内容:
    1. 进程
    2. 数据共享
    3. 锁 
    4. 进程池 
    5. 模块(爬虫)
        - requests 
        - bs4(beautifulsoup)
    6. 协程
    
内容详细:
    1. 进程
        - 进程间数据不共享
            data_list = []
            def task(arg):
                data_list.append(arg)
                print(data_list)


            def run():
                for i in range(10):
                    p = multiprocessing.Process(target=task,args=(i,))
                    # p = threading.Thread(target=task,args=(i,))
                    p.start()

            if __name__ == '__main__':
                run()
        - 经常使用功能: 
            - join
            - deamon
            - name
            - multiprocessing.current_process()
            - multiprocessing.current_process().ident/pid
        
        - 类继承方式建立进程
            class MyProcess(multiprocessing.Process):

                def run(self):
                    print('当前进程',multiprocessing.current_process())


                def run():
                    p1 = MyProcess()
                    p1.start()

                    p2 = MyProcess()
                    p2.start()

            if __name__ == '__main__':
                run()
                
    2. 进程间数据共享
        Queue:
            linux:
                q = multiprocessing.Queue()

                def task(arg,q):
                    q.put(arg)

                def run():
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i, q,))
                        p.start()

                    while True:
                        v = q.get()
                        print(v)

                run()
            windows:    
                def task(arg,q):
                    q.put(arg)

                if __name__ == '__main__':
                    q = multiprocessing.Queue()
                    for i in range(10):
                        p = multiprocessing.Process(target=task,args=(i,q,))
                        p.start()
                    while True:
                        v = q.get()
                        print(v)
    
        Manager:(*)
            Linux:
                m = multiprocessing.Manager()
                dic = m.dict()

                def task(arg):
                    dic[arg] = 100

                def run():
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i,))
                        p.start()

                    input('>>>')
                    print(dic.values())
                    
                if __name__ == '__main__':
                    
                    run()
            windows:
                def task(arg,dic):
                    time.sleep(2)
                    dic[arg] = 100

                if __name__ == '__main__':
                    m = multiprocessing.Manager()
                    dic = m.dict()

                    process_list = []
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i,dic,))
                        p.start()

                        process_list.append(p)

                    while True:
                        count = 0
                        for p in process_list:
                            if not p.is_alive():
                                count += 1
                        if count == len(process_list):
                            break
                    print(dic)
    
    3. 进程锁 
            import time
            import threading
            import multiprocessing


            lock = multiprocessing.RLock()

            def task(arg):
                print('鬼子来了')
                lock.acquire()
                time.sleep(2)
                print(arg)
                lock.release()

            if __name__ == '__main__':
                p1 = multiprocessing.Process(target=task,args=(1,))
                p1.start()

                p2 = multiprocessing.Process(target=task, args=(2,))
                p2.start()
    
        为何要加锁?
    
    4. 进程池
        import time
        from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

        def task(arg):
            time.sleep(2)
            print(arg)

        if __name__ == '__main__':

            pool = ProcessPoolExecutor(5)
            for i in range(10):
                pool.submit(task,i)

    5. 初识爬虫:
        安装:
            pip3 install requests 
            pip3 install beautifulsoup4 
        
        问题:
            找不到内部指令?
                方式一:
                    C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts\pip3  install requests 
                方式二:
                    C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts
                    
                    pip3  install requests 
                
        
        示例:
            import requests
            from bs4 import BeautifulSoup
            from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor


            # 模拟浏览器发送请求
            # 内部建立 sk = socket.socket()
            # 和抽屉进行socket链接 sk.connect(...)
            # sk.sendall('...')
            # sk.recv(...)

            def task(url):
                print(url)
                r1 = requests.get(
                    url=url,
                    headers={
                        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
                    }
                )

                # 查看下载下来的文本信息
                soup = BeautifulSoup(r1.text,'html.parser')
                print(soup.text)
                # content_list = soup.find('div',attrs={'id':'content-list'})
                # for item in content_list.find_all('div',attrs={'class':'item'}):
                #     title = item.find('a').text.strip()
                #     target_url = item.find('a').get('href')
                #     print(title,target_url)

            def run():
                pool = ThreadPoolExecutor(5)
                for i in range(1,50):
                    pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)


            if __name__ == '__main__':
                run()
    
        相关:
            a. 以上示例进程和线程那个好?
                - 线程好 
            b. requests模块模拟浏览器发送请求
                - 本质 requests.get(...):
                    - 建立socket客户端
                    - 链接 【阻塞】
                    - 发送请求
                    - 接收请求【阻塞】
                    - 断开链接 
                
            c. 线程和进程池 
            
    
重点总结:
    1. 进程    *****
        - windows 
        - linux 
    2. 进程数据共享 *****
        - Queue
        - Manager
    3. 进程锁   ***
    4. 进程池    *****
    5. 爬虫(进程池/线程池的应用)

 

# by luffycity.com
"""
面向对象补充
"""

"""
class Foo(object):
    def __init__(self):
        self.info = {}

    def __setitem__(self, key, value):
        self.info[key] = value

    def __getitem__(self, item):
        return self.info.get(item)


obj = Foo()
obj['x'] = 123
print(obj['x'])
"""
from flask import globals
# class Foo(object):
#
#     def __init__(self):
#         object.__setattr__(self, 'info', {}) # 在对象中设置值的本质
#
#     def __setattr__(self, key, value):
#         self.info[key] = value
#
#     def __getattr__(self, item):
#         print(item)
#         return self.info[item]
#
# obj = Foo()
# obj.name = 'alex'
# print(obj.name)
v = []
for i in range(10000):
    v.append(i)

print(v)
View Code
# by luffycity.com
import multiprocessing
import threading




# ##################### 进程间的数据不共享 #####################
"""
data_list = []

def task(arg):
    data_list.append(arg)
    print(data_list)


def run():
    for i in range(10):
        p = multiprocessing.Process(target=task,args=(i,))
        # p = threading.Thread(target=task,args=(i,))
        p.start()

if __name__ == '__main__':
    run()
"""
# ##################### 进程经常使用功能 #####################
"""
import time
def task(arg):
    time.sleep(2)
    print(arg)


def run():
    print('111111111')
    p1 = multiprocessing.Process(target=task,args=(1,))
    p1.name = 'pp1'
    p1.start()
    print('222222222')

    p2 = multiprocessing.Process(target=task, args=(2,))
    p2.name = 'pp2'
    p2.start()
    print('333333333')

if __name__ == '__main__':
    run()
"""

# ##################### 经过继承方式建立进程 #####################

class MyProcess(multiprocessing.Process):

    def run(self):
        print('当前进程',multiprocessing.current_process())


def run():
    p1 = MyProcess()
    p1.start()

    p2 = MyProcess()
    p2.start()

if __name__ == '__main__':
    run()
# by luffycity.com
import multiprocessing
import threading
import queue
import time
# ##################### 进程间的数据共享:multiprocessing.Queue #####################
"""
q = multiprocessing.Queue()

def task(arg,q):
    q.put(arg)


def run():
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i, q,))
        p.start()

    while True:
        v = q.get()
        print(v)
run()
"""
# ##################### 进程间的数据共享:Manager #####################
"""
def task(arg,dic):
    time.sleep(2)
    dic[arg] = 100

if __name__ == '__main__':
    m = multiprocessing.Manager()
    
    process_list = []
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i,dic,))
        p.start()

        process_list.append(p)

    while True:
        count = 0
        for p in process_list:
            if not p.is_alive():
                count += 1
        if count == len(process_list):
            break
    print(dic)
    # ...
"""
# ##################### 进程间的数据其余电脑 #####################
"""
def task(arg,dic):
    pass

if __name__ == '__main__':
    while True:
        # 链接上指定的服务器
        # 去机器上获取url
        url = 'adfasdf'
        p = multiprocessing.Process(target=task, args=(url,))
        p.start()

"""

 

# by luffycity.com
import time
import threading
import multiprocessing


lock = multiprocessing.RLock()

def task(arg):
    print('鬼子来了')
    lock.acquire()
    time.sleep(2)
    print(arg)
    lock.release()


if __name__ == '__main__':
    p1 = multiprocessing.Process(target=task,args=(1,))
    p1.start()

    p2 = multiprocessing.Process(target=task, args=(2,))
    p2.start()
# by luffycity.com
import time
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

def task(arg):
    time.sleep(2)
    print(arg)

if __name__ == '__main__':

    pool = ProcessPoolExecutor(5)
    for i in range(10):
        pool.submit(task,i)
# by luffycity.com
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor


# 模拟浏览器发送请求
# 内部建立 sk = socket.socket()
# 和抽屉进行socket链接 sk.connect(...)
# sk.sendall('...')
# sk.recv(...)

def task(url):
    print(url)
    r1 = requests.get(
        url=url,
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
        }
    )

    # 查看下载下来的文本信息
    soup = BeautifulSoup(r1.text,'html.parser')
    print(soup.text)
    # content_list = soup.find('div',attrs={'id':'content-list'})
    # for item in content_list.find_all('div',attrs={'class':'item'}):
    #     title = item.find('a').text.strip()
    #     target_url = item.find('a').get('href')
    #     print(title,target_url)

def run():
    pool = ThreadPoolExecutor(5)
    for i in range(1,50):
        pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)


if __name__ == '__main__':
    run()

 

 

# by luffycity.com
import multiprocessing
import time

def task(arg, dic):
    time.sleep(2)
    dic[arg] = 100


if __name__ == '__main__':
    m = multiprocessing.Manager()
    dic = {}

    process_list = []
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i, dic,))
        p.start()

    print('end')
相关文章
相关标签/搜索