第一次做网页抓取。我需要从HTML脚本中的两个变量中提取一些字典。以下是我提取HTML的步骤:
url = "https://www.backstabbr.com/game/Nexus-Season6-Game37/5466300639084544#"
page = requests.get(url).text
soup = BeautifulSoup(page, 'html.parser')
放大我实际需要的HTML部分,我有:
<script>
// NEW JAVSCRIPT!;
var stage = "NEEDS_ORDERS";
var orders = {};
var unitsByPlayer = {"Austria": {"Ven": "F"}, "England": {"BAL": "F", "Den": "A", "HEL": "F", "IRI": "F", "Lon": "F", "NAO": "F", "NTH": "F", "Stp": "A", "War": "A"}, "France": {"Ber": "A", "Bre": "F", "Bur": "A", "ENG": "F", "Hol": "F", "Kie": "A", "MAO": "F", "Naf": "A", "Ruh": "A", "Tyr": "A"}, "Italy": {"Mar": "F", "Tus": "A", "TYS": "F"}, "Turkey": {"Ank": "A", "Bud": "A", "Con": "F", "Gal": "A", "ION": "F", "LYO": "F", "Mos": "A", "Ser": "A", "Smy": "F", "Tri": "A", "Ukr": "A"}};
var territories = {"Lon": "England", "Lvp": "England", "Edi": "England", "Tri": "Turkey", "Bud": "Turkey", "Vie": "England", "Con": "Turkey", "Ank": "Turkey", "Smy": "Turkey", "Rom": "Italy", "Nap": "Italy", "Ven": "Austria", "Par": "France", "Mar": "Italy", "Bre": "France", "Sev": "Turkey", "Stp": "England", "Mos": "Turkey", "War": "England", "Ber": "France", "Mun": "France", "Kie": "France", "Den": "England", "Nwy": "England", "Bul": "Turkey", "Tun": "France", "Spa": "France", "Por": "France", "Rum": "Turkey", "Hol": "France", "Swe": "England", "Ser": "Turkey", "Gre": "Turkey", "Bel": "France"};
var activePlayer = null;
var unitChangeCount = {};
var buildableTerritories = [];
var unbuildableTerritories = [];
var retreatOptions = {};
var playerRetreatOrders = {}; // not sure this is used
var disable_engine = true;
var base_url = '/game/Nexus-Season6-Game37/5466300639084544';
var session_id = '';
var want_shaded_territories = none;
var gameType = 'game';
var nextAdjudicationTime = '2021-05-12 20:26:20.762615+00:00';
var gapi_p1 = 'MjAyMS0wNS0xMSAyMDoyNjoyMC43NjMwMDQ=';
</script>
我需要提取UnitsByLayer和Territions变量的字典。有人知道如何直接使用变量soup来实现吗?提前非常感谢
您可以使用
re
/json
模块来解析数据:印刷品:
相关问题 更多 >
编程相关推荐