{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Requests" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## URLs\n", "\n", "```\n", "<protocal>://<domain>/<path>\n", "https://www.ptt.cc/bbs/Gossiping/search?q=thread%3A%nhk\n", "```\n", "\n", "- protocal: `https`\n", "- domain: `www.ptt.cc`\n", "- path: `/Gossiping/search?q=thread%3A%nhk`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Request Params\n", "\n", "`/search?q=thread%3A%nhk`\n", "\n", "1. after `?`\n", "1. `<key>=<value>`: `q=thread%3A%nhk`\n", "1. multiple params: `<key1>=<value1>&<key2>=<value2>`\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.ptt.cc/bbs/Gossiping/search?q=%E7%B8%BD%E7%B5%B1\n", "\n", "<!DOCTYPE html>\n", "<html>\n", "\t<head>\n", "\t\t<meta charset=\"utf-8\">\n", "\t\t\n", "\n", "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n", "\n", "<title>總統 - Gossipin\n" ] } ], "source": [ "import requests\n", "\n", "search_term = '總統'\n", "\n", "base_url = 'https://www.ptt.cc/bbs/Gossiping/search'\n", "cookies = {'over18': '1'}\n", "params = {'q': search_term}\n", "\n", "rq = requests.get(base_url, cookies=cookies, params=params)\n", "print(rq.url)\n", "print()\n", "print(rq.text[:150])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Wrappers: [`searchPTT.py`](https://github.com/liao961120/pynote/tree/master/content/searchPTT.py)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.ptt.cc/bbs/Gossiping/search?q=%E7%B8%BD%E7%B5%B1\n", "\n", "<!DOCTYPE html>\n", "<html>\n", "\t<head>\n", "\t\t<meta charset=\"utf-8\">\n", "\t\t\n", "\n", "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n", "\n", "<title>總統 - Gossiping - 批踢踢實業坊</title>\n", "\n", "<link rel=\"stylesheet\" type=\"t\n" ] } ], "source": [ "import searchPTT\n", "\n", "rq = searchPTT.search('總統', board='Gossiping')\n", "print(rq.url)\n", "print()\n", "print(rq.text[:200])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.ptt.cc/bbs/Gossiping/search?q=author%3AEbinaHina\n", "\n", "<!DOCTYPE html>\n", "<html>\n", "\t<head>\n", "\t\t<meta charset=\"utf-8\">\n", "\t\t\n", "\n", "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n", "\n", "<title>author:EbinaHina - Gossiping - 批踢踢實業坊</title>\n", "\n", "<link rel=\"style\n" ] } ], "source": [ "rq = searchPTT.search('EbinaHina', mode='author', board='Gossiping')\n", "print(rq.url)\n", "print()\n", "print(rq.text[:200])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.ptt.cc/bbs/Gossiping/search?q=thread%3Athread%3A%5B%E7%88%86%E5%8D%A6%5D+%E7%AA%81%E6%93%8A%E7%BD%B7%E5%B7%A5+%E9%95%B7%E6%A6%AE%E5%8D%94%E5%95%86%E5%B0%8D%E8%A9%B1%E6%95%B4%E7%90%86\n", "\n", "<!DOCTYPE html>\n", "<html>\n", "\t<head>\n", "\t\t<meta charset=\"utf-8\">\n", "\t\t\n", "\n", "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n", "\n", "<title>thread:thread:[爆卦] 突擊罷工 長榮協商對話整理 - Gossiping - 批踢踢實業坊</title>\n", "\n", "\n" ] } ], "source": [ "rq = searchPTT.search('thread:[爆卦] 突擊罷工 長榮協商對話整理', mode='thread', board='Gossiping')\n", "print(rq.url)\n", "print()\n", "print(rq.text[:200])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }