-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scrape_Gamma_Content.ps1
140 lines (118 loc) · 5.48 KB
/
Scrape_Gamma_Content.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
function Get-HeaderGamma ($domain ){
$useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"
# freetour does not work on some sites this needs to become site specific
[uri]$url = "https://www.$domain.com/"
#[uri]$url = "https://freetour.$domain.com/"
[hashtable]$headers = @{
"User-Agent" = $useragent;
"Origin" = $url.Host;
"Referer" = $url.Host;
"Content-type" = "application/json"
}
$groups = Invoke-WebRequest -Uri $url -Headers $headers | Select-String -Pattern "window.env\s+=\s(.+);"
$keys = $groups.Matches.groups[1].Value | ConvertFrom-Json
$headers.Add("x-algolia-application-id",$keys.api.algolia.applicationID)
$headers.Add("x-algolia-api-key",$keys.api.algolia.apiKey)
# some sites like blowpass need you to login
# in that case the next piece of code looks for an html file save after logging in
If ($null -eq $headers.'x-algolia-api-key' ) {
$headers = Get-HeaderSubscriber -domain $domain
}
return $headers
}
function Get-HeaderSubscriber ($domain){
$useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0"
# freetour does not work on some sites this needs to become site specific
[uri]$url = "https://members.$domain.com/en"
[hashtable]$headers = @{
"User-Agent" = $useragent;
"Origin" = "https://" + $url.Host;
"Referer" = "https://" + $url.Host;
"Content-type" = "application/json"
}
$domainfile = "C:\DB\Gamma\session\$domain.html"
$groups = Get-Content -Path $domainfile | Select-String -Pattern "window.env\s+=\s(.+);"
$keys = $groups.Matches.groups[1].Value | ConvertFrom-Json
$headers.Add("x-algolia-application-id",$keys.api.algolia.applicationID)
$headers.Add("x-algolia-api-key",$keys.api.algolia.apiKey)
return $headers
}
function Write-Json2File ($domain, $ContentType) {
$header = Get-HeaderGamma -domain $domain
$pagenum = 0
$nbpages = 1
$contents = New-Object -TypeName System.Collections.ArrayList
[uri]$url = "https://tsmkfa364q-dsn.algolia.net/1/indexes/*/queries"
$content_type = @{
scenes = 'all_scenes_latest_desc'
movies = 'all_movies_latest_desc'
actors = 'all_actors'
channels = 'all_channels'
photos = 'all_photosets_latest_desc'
}
$indexName = $content_type[$ContentType]
$jsonbase = "C:\DB\Gamma\json\" + $domain + "\"
if (!(Test-Path $jsonbase)) {New-Item -ItemType "directory" -Path $jsonbase}
#$network = "Devil's Film"
do {
$page = $pagenum.ToString()
$body = "{""requests"": [{""indexName"": ""$indexName"",""params"": ""&hitsPerPage=1000&page=$page""}]}"
$response = Invoke-WebRequest -Uri $url -Headers $header -Method POST -Body $body
$rjson = $response.content | ConvertFrom-Json -Depth 48 -AsHashtable
$content = $rjson.results.hits
# the next 2 statements are not needed
# the purpose is to create an id for use with Azure CosmosDB and remove redundant data
$content|foreach-object{$_|add-member -membertype noteproperty -name id -value $_.objectID}
$content| ForEach-Object {$_.PSObject.Properties.Remove('_highlightResult')}
if ($pagenum -eq 0) {
$rjson = $response.content | ConvertFrom-Json -Depth 48
$hits = $rjson.results.nbHits
$nbpages = $rjson.results.nbPages
}
if ($hits -eq 0){continue}
$filejson = -join($jsonbase,$domain,"_",$ContentType,"_",$page,".json")
$content | ConvertTo-Json -Depth 48 | Out-File -FilePath $filejson
$contents.AddRange($content)
$pagenum++
} until ($pagenum -eq $nbpages)
$filejson = -join($jsonbase,$domain,"_",$ContentType,".json")
$contents | ConvertTo-Json -Depth 48 | Out-File -FilePath $filejson
}
$contentlist = @('scenes','movies','actors','photos','channels')
$domains =@('asgmax','adulttime','girlfriendsfilms'`
,"biphoria","genderxfilms","wicked"`
,"ragingstallion","falconstudios"`
)
$domain = $domains[6] #set to Adult Time for someone
Foreach ($content in $contentlist){
Write-Json2File -domain $domain -ContentType $content
}
# this is for manual operation
# just change the domain and content type to suit.
# $domain = $domains[1]
# Foreach ($content in $contentlist){
# Write-Json2File -domain $domain -ContentType $content
# }
# $domain = "dfxtra"
# $ContentType = "channels"
# Write-Json2File -domain $domain -ContentType $ContentType
# $domain = "addicted2girls"
# $ContentType = "actors"
# Write-Json2File -domain $domain -ContentType $ContentType
# $domain = "addicted2girls"
# $ContentType = "movies"
# Write-Json2File -domain $domain -ContentType $ContentType
# $header = Get-HeaderSubscriber -domain "blowpass"
# $domain = "xempire"
# $ContentType = "actors"
# Write-Json2File -domain $domain -ContentType $ContentType
# Get-HeaderGamma -domain "addicted2girls"
$contentlist = @('scenes','movies','actors','photos','channels')
$domains =@('asgmax','adulttime','girlfriendsfilms'`
,"biphoria","genderxfilms","wicked"`
,"ragingstallion","falconstudios"`
)
$domain = $domains[6] #set to dogfart for someone
Foreach ($content in $contentlist){
Write-Json2File -domain $domain -ContentType $content
}