保留几个问题
- 在python中
$
无法作为函数名出现,所以作者将其转换$
->querySelector
- …
selector
对于固定元素,可以通过chrome自带功能快速获取
小脚本
简易脚本,用于信息收集阶段,批量网页截图,国外有工具aquatone
# coding: utf-8
import asyncio
from pyppeteer import launch
from urllib.parse import urlparse
from optparse import OptionParser
from os import mkdir
from datetime import datetime
opts = OptionParser()
opts.add_option('-f', action='store', default="", type="string", dest="filename", help=u"目标地址文件路径")
opts.add_option('--https', action='store_true', default=False, dest="ssl", help=u"对无协议地址,是否使用https访问。默认:False")
opts.add_option('--full-page', action='store_true', default=False, dest="fullpage", help=u"全屏截图。默认:False")
opts.add_option('--time-wait', action='store', default=1000, type='int', dest="timewait", help=u"等待页面加载时间,视网速而定。默认:1000ms")
opts.add_option('--time-out', action='store', default=3000, type='int', dest="timeout", help=u"页面超时时间。默认:3000ms")
def getUrlFromFile(filename):
with open(filename) as file:
for line in file:
yield line.strip()
async def getScreenshot(urlList, options):
browser = await launch(headless=False)
page = await browser.newPage()
scheme = "https://" if options.ssl else "http://"
for url in urlList:
try:
fullUrl = scheme + url if not len(urlparse(url)[0]) else url
await page.goto(fullUrl, timeout=options.timeout)
await page.waitFor(options.timewait)
await page.screenshot({'path': '%s/%s.png' % (options.path, url), 'fullPage': options.fullpage})
except Exception as e:
print("[!]Warning: failure of screenshot %s" % url)
# print(e)
try:
await page.close()
await browser.close()
except:
pass
browser = await launch(headless=False)
page = await browser.newPage()
if __name__ == '__main__':
try:
options, args = opts.parse_args()
if not len(options.filename):
print(opts.print_help())
exit(1)
options.path = datetime.now().strftime('%Y.%m.%d %H:%M:%S')
mkdir(options.path)
urls = getUrlFromFile(options.filename)
asyncio.get_event_loop().run_until_complete(getScreenshot(urls, options))
except KeyboardInterrupt:
print("[!] Exiting ~")
exit(2)
效果
tl;dr
多动手编写就好了