蜘蛛蟒

fzutils的Python项目详细描述


███████╗███████╗██╗   ██╗████████╗██╗██╗     ███████╗
██╔════╝╚══███╔╝██║   ██║╚══██╔══╝██║██║     ██╔════╝
█████╗    ███╔╝ ██║   ██║   ██║   ██║██║     ███████╗
██╔══╝   ███╔╝  ██║   ██║   ██║   ██║██║     ╚════██║
██║     ███████╗╚██████╔╝   ██║   ██║███████╗███████║
╚═╝     ╚══════╝ ╚═════╝    ╚═╝   ╚═╝╚══════╝╚══════╝                                                   

Build StatusGitHub licenseGitHub forksGitHub starsTwitter

fzutils

这是什么?

这是fz的python utils包, for Spider.

旨在: 高效快速的进行爬虫开发的集成包

Install

pip3 install fzutils

要求

simple use

fromfzutils.ip_poolsimport(IpPools,ip_proxy_pool,fz_ip_pool,)# 高匿# type默认是ip_proxy_pool, 可修改为fz_ip_pool, 具体看你使用哪个ip池ip_obj=IpPools(type=ip_proxy_pool,high_conceal=True)# 得到一个随机ip, eg: 'http://175.6.2.174:8088'proxy=ip_obj._get_random_proxy_ip()
fromfzutils.spider.crawlerimportCrawler,AsyncCrawlerfromfzutils.ip_poolsimportfz_ip_poolclassASpider(Crawler):# Crawler为爬虫基类def__init__(self,logger=None)->None:super(ASpider,self).__init__(ip_pool_type=fz_ip_pool,log_print=True,logger=logger,log_save_path='log文件存储path',is_use_driver=True,driver_executable_path='驱动path',)classBSpider(AsyncCrawler):"""异步爬虫"""pass_=ASpider()
fromfzutils.spider.fz_driverimportBaseDriver,PHANTOMJSfromfzutils.ip_poolsimportip_proxy_pool# ip_pool_type默认也是ip_proxy_pool# BaseDriver支持phantomjs, chromedriver, firefoxdriver_=BaseDriver(type=PHANTOMJS,executable_path='xxx',ip_pool_type=ip_proxy_pool)exec_code='''js = 'document.body.scrollTop=10000'self.driver.execute_script(js) '''body=_.get_url_body(url='xxx',exec_code=exec_code)
fromfzutils.spider.fz_requestsimportRequestsfromfzutils.ip_poolsimportip_proxy_pool# ip_pool_type默认也是ip_proxy_poolbody=Requests.get_url_body(method='get',url='xxx',ip_pool_type=ip_proxy_pool)
importasynciofromfzutils.spider.fz_aiohttpimportAioHttpasyncdeftmp():_=AioHttp(max_tasks=5)returnawait_.aio_get_url_body(url='xxx',headers={})
fromfzutils.time_utilsimport(fz_set_timeout,fz_timer,)fromtimeimportsleepimportsys# 设置执行超时@fz_set_timeout(2)deftmp():sleep(3)# 计算函数用时, 支持sys.stdout.write or logger.info@fz_timer(print_func=sys.stdout.write)deftmp_2():sleep(3)tmp()tmp_2()
fromfzutils.log_utilsimportset_loggerfromloggingimportINFO,ERRORlogger=set_logger(log_file_name='path',console_log_level=INFO,file_log_level=ERROR)
fromfzutils.auto_ops_utilsimportauto_git# 自动化gitauto_git(path='xxx/path')
fromfzutils.path_utilsimportcd# cd 到目标上下文并进行其他操作withcd('path'):pass
fromfzutils.sql_utilsimport(BaseSqlServer,pretty_table,)_=BaseSqlServer(host='host',user='user',passwd='passwd',db='db',port='port')# db美化打印pretty_table(cursor=_._get_one_select_cursor(sql_str='sql_str',params=('some_thing',)))
fromfzutils.linux_utilsimport(kill_process_by_name,process_exit,)# 根据process_name kill processkill_process_by_name(process_name='xxxx')# 根据process_name 判断process是否存在process_exit(process_name='xxxx')
fromfzutils.linux_utilsimportdaemon_initdefrun_forever():pass# 守护进程daemon_init()run_forever()
fromfzutils.internet_utilsimport(get_random_pc_ua,get_random_phone_ua,)# 随机user-agentpc_user_agent=get_random_pc_ua()phone_user_agent=get_random_phone_ua()
fromfzutils.common_utilsimport_print# 支持sys.stdout.write or logger_print(msg='xxx',logger=logger,exception=e,log_level=2)
fromfzutils.auto_ops_utilsimport(upload_or_download_files,local_compress_folders,remote_decompress_folders,)fromfabric.connectionimportConnectionconnect_obj=Connection()# local 与 server端 上传或下载文件upload_or_download_files(method='put',connect_object=connect_obj,local_file_path='/Users/afa/myFiles/tmp/my_spider_logs.zip',remote_file_path='/root/myFiles/my_spider_logs.zip')# 本地解压zip文件local_compress_folders(father_folders_path='/Users/afa/myFiles',folders_name='my_spider_logs',default_save_path='xxxxxx')# 远程解压zip文件remote_decompress_folders(connect_object=connect_obj,folders_path='/root/myFiles/my_spider_logs.zip',target_decompress_path='/root/myFiles/')
fromfzutils.common_utilsimportjson_2_dict# json转dict, 处理部分不规范json_dict=json_2_dict(json_str='json_str',logger=logger,encoding='utf-8')
fromfzutils.auto_ops_utilsimportjudge_whether_file_existsfromfabric.connectionimportConnectionconnect_obj=Connection()# 判断server文件是否存在result=judge_whether_file_exists(connect_object=connect_obj,file_path='file_path')
fromfzutils.email_utilsimportFZEmail_=FZEmail(user='xxx',passwd='密码 or smtp授权码')_.send_email(to=['xxx@gmail.com',],subject='邮件正文',text='邮件内容')
fromrequestsimportsessionsfromfzutils.common_utilsimport(save_obj,get_obj,)s=sessions()# 对象持久化存储save_obj(s,'s.txt')get_obj('s.txt')
fromfzutils.data.str_utilsimport(char_is_chinese,char_is_alphabet,char_is_number,char_is_other,)# 单字符判断其类型print(char_is_chinese('你'))print(char_is_alphabet('a'))print(char_is_number('1'))print(char_is_other('_'))
fromfzutils.algorithm_utilsimportmerge_sort# 归并排序print(merge_sort([-1,2,1]))# 还有很多其他排序方法
fromfzutils.data.pickle_utilsimportdeserializate_pickle_objectfrompickleimportdumpsa=dumps({'1':1,})# 反序列化python对象print(deserializate_pickle_object(a))
fromfzutils.aio_utilsimportget_async_execute_result# 获取异步执行结果res=get_async_execute_result(obj='xxx类',obj_method_name='xxx类方法',)
fromfzutils.common_utilsimportretrydefvalidate_res(res):'''验证结果的函数'''ifres==5:returnTrueelse:returnFalse# 重试装饰器@retry(max_retries=4,validate_func=validate_res)defa(t):returnt-2print(a(7))

curl

curl cmd 转 python 代码

fromfzutils.curl_utilsimportcurl_cmd_2_py_code# 使用前提(已安装: npm install --save curlconverter)curl_cmd="curl 'http://en.wikipedia.org/' -H 'Accept-Encoding: gzip, deflate, sdch' -H 'Accept-Language: en-US,en;q=0.8' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' -H 'Referer: http://www.wikipedia.org/' -H 'Cookie: GeoIP=US:Albuquerque:35.1241:-106.7675:v4; uls-previous-languages=%5B%22en%22%5D; mediaWiki.user.sessionId=VaHaeVW3m0ymvx9kacwshZIDkv8zgF9y; centralnotice_buckets_by_campaign=%7B%22C14_enUS_dsk_lw_FR%22%3A%7B%22val%22%3A%220%22%2C%22start%22%3A1412172000%2C%22end%22%3A1422576000%7D%2C%22C14_en5C_dec_dsk_FR%22%3A%7B%22val%22%3A3%2C%22start%22%3A1417514400%2C%22end%22%3A1425290400%7D%2C%22C14_en5C_bkup_dsk_FR%22%3A%7B%22val%22%3A1%2C%22start%22%3A1417428000%2C%22end%22%3A1425290400%7D%7D; centralnotice_bannercount_fr12=22; centralnotice_bannercount_fr12-wait=14' -H 'Connection: keep-alive' --compressed"res=curl_cmd_2_py_code(curl_cmd)

ocr识别

fromfzutils.ocr_utilsimport(baidu_ocr_captcha,baidu_orc_image_main_body,get_tracks_based_on_distance,dichotomy_match_gap_distance,)# 百度orc识别captchacaptcah=baidu_ocr_captcha(app_id='xx',api_key='xx',secret_key='xx',img_path='图片地址',orc_type=2)# 百度ocr识别图片主体内容位置img_url='https://www.baidu.com/link?url=phUVHvSMIfwj2DPXnprj0BTv4loPocnLfNn-CVb7UQE4NLe7PH8GbrYKDkX2hzyp17Eqhy-s1rP8Zg92NEt0vqUxm_nhLoyRTaaxMFwq1oMdPaG_krazDsxHgLlql9QkZB92VhsTirtG53MvyecIFLjWeHjdyGCyTOaS-UcksfOJkPFOAJOFe4AoCxW5qQUbTahhjhjXWyihP-XmYIR5z-Gt3esBvFJpuHhUy7W6OODMrUZ2v7mUa9ng2BFKDy2MREyZQcXW80D3eDqWbIFLQ5BtEqWEknWa_1kxKXf4qo7GAZjkANyTP8D2PN0jHRw2AiWtN3d57J6GP4hksByVAzwIJWeWIiObv69Q1ekb2O_WsYLbKfzIsVLdlZGm5SHXnMgKZkRay_I8NKeq-wUb2wLKsGCjhRC1AV-GSv5Q7fIEj1QrSgQjLnW6Fjh55M5AaM9JRJLlXWhANegCn6jpJhnL7vcV1-kDgUcKQVFNq27fol2E2fG-d7ja03dizHCawAsIr6ortoWeqDdpyW4VOesI1VU6_WDdAWs96KZqVD2gATBs1U_D5nbYC9DAuZYK&wd=&eqid=81209347000143bf000000035b933e62'res=baidu_orc_image_main_body(img_url=img_url)# 根据给与距离生成仿生移动轨迹tracks=get_tracks_based_on_distance(distance=100)# 二分法匹配滑块与缺口间的距离distance=dichotomy_match_gap_distance(bg_img_path='xxx',slide_img_path='xxx')

qrcode

二维码解码

fromfzutils.qrcode_utilsimportdecode_qrcodeimg_url='https://i.loli.net/2018/11/15/5bed1adce184e.jpg'print(decode_qrcode(img_url=img_url))

批量注册账号

frompprintimportpprintfromfzutils.register_utilsimportYiMaSmser_=YiMaSmser(username='账号',pwd='密码')project_id=715whileTrue:# 获取新手机号phone_num=_._get_phone_num(project_id=project_id)print(phone_num)a=input('是否可用: ')ifa=='y':breakprint('\n未注册的: {}'.format(phone_num))# 获取该手机号的短信sms_res=_._get_sms(phone_num=phone_num,project_id=project_id)print(sms_res)# 查看自己的账户余额money_res=_._get_account_info()pprint(money_res)
fromtimeimporttime,sleepfromfzutils.register_utilsimportTwentyFourEmail_=TwentyFourEmail()email_address=_._get_email_address()print('获取到的email_address: {}'.format(email_address))# # 换个邮箱# email_address = _._get_new_email_address()# print(email_address)message_count=lambda:_._get_email_message_count()start_time=time()index=1whilemessage_count()in(0,None)andtime()-start_time<100.:sleep_time=2print('{} try, 休眠{}s...'.format(index,sleep_time))sleep(sleep_time)index+=1message_list=_._get_email_message_list()print(message_list)

代码模板生成

fromfzutils.spider.autoimportauto_generate_crawler_code# 爬虫基本代码自动生成器auto_generate_crawler_code()"""shell输出如下: #--------------------------------# 爬虫模板自动生成器 by super_fazai#--------------------------------@@ 下面是备选参数, 无输入则取默认值!!请输入author:super_fazai请输入email:superonesfazai@gmail.com请输入创建的文件名(不含.py):fz_spider_demo请输入class_name:FZSpiderDemo创建爬虫文件fz_spider_demo.py完毕!enjoy!?"""
# 还有很多其他常用函数, 待您探索...

资源

fzutils的home < https://www.github.com/superonesfazai/python >

版权和保修

此发行版中的代码为版权所有 (c) super_fazai, 除非另有明确说明.

fzutils根据MIT许可证提供, 包含的LICENSE文件详细描述了这一点.

贡献者

  • super_fazai

作者

super_fazai

<author_email: superonesfazai@gmail.com>

欢迎加入QQ群-->: 979659372 Python中文网_新手群

推荐PyPI第三方库


热门话题
java使用双一元运算符vs函数<Double,Double>   由于存在巨大的休眠,超出了java GC开销限制   java更改Tomcat中Apache文件上载的临时目录   当PDF位于本地驱动器时,javascript无法在IE中显示PDF   java hibernate如何加载瞬态对象?   java如何判断包属于哪个模块?   创建Word文档时java文件已损坏   java如何将Eclipse中的每个开放项目导出为自己的JAR?   java将带有getter和setter的变量添加到现有类中   java高效地发送多封电子邮件   java读/写。具有特殊字符的txt文件   java如何在导出到jar时包含opencv本机库   java Xstream在未完成时停止写入文件   if语句Java:无法检查布尔值是否为null   文本Java:读取txt文件并将其保存在字符串数组中,但不带反斜杠(空格)?   java如何使用正则表达式替换字符串的一部分   通过Java远程运行Powershell脚本   filenames带有xml文件空指针异常的Java文件uri