You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ge...@apache.org on 2023/11/28 20:22:02 UTC
(solr) branch branch_9x updated: SOLR-17078: Upgrade ltr script to use Python3 (#2077)
This is an automated email from the ASF dual-hosted git repository.
gerlowskija pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 516b4795638 SOLR-17078: Upgrade ltr script to use Python3 (#2077)
516b4795638 is described below
commit 516b4795638867ca06ae83fcb1506799803b6168
Author: Jason Gerlowski <ge...@apache.org>
AuthorDate: Tue Nov 28 15:01:57 2023 -0500
SOLR-17078: Upgrade ltr script to use Python3 (#2077)
Python2 reached its final EOL in 2020. This script should use Python3
like all others in the repo.
---
solr/CHANGES.txt | 2 +
solr/modules/ltr/example/README.md | 2 +-
.../ltr/example/train_and_upload_demo_model.py | 78 +++++++++++-----------
3 files changed, 42 insertions(+), 40 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b9c8c3a597d..091ecf0ef25 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -72,6 +72,8 @@ Other Changes
* SOLR-17072: package CLI tool prints error JSONPath (Mikhail Khludnev)
+* SOLR-17078: The `train_and_upload_demo_model.py` script referenced in LTR documentation now uses Python3 (Jason Gerlowski)
+
================== 9.4.0 ==================
New Features
---------------------
diff --git a/solr/modules/ltr/example/README.md b/solr/modules/ltr/example/README.md
index 7da1a7c8765..7cd66484f2f 100644
--- a/solr/modules/ltr/example/README.md
+++ b/solr/modules/ltr/example/README.md
@@ -37,7 +37,7 @@ Please refer to the Solr Reference Guide's section on [Learning To Rank](https:/
`cd modules/ltr/example`
- `python train_and_upload_demo_model.py -c config.json`
+ `python3 train_and_upload_demo_model.py -c config.json`
This script deploys your features from `config.json` "solrFeaturesFile" to Solr. Then it takes the relevance judged query
document pairs of "userQueriesFile" and merges it with the features extracted from Solr into a training
diff --git a/solr/modules/ltr/example/train_and_upload_demo_model.py b/solr/modules/ltr/example/train_and_upload_demo_model.py
index 38624566a2b..3258f82869a 100755
--- a/solr/modules/ltr/example/train_and_upload_demo_model.py
+++ b/solr/modules/ltr/example/train_and_upload_demo_model.py
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import sys
import json
-import httplib
-import urllib
+import http.client
+import urllib.request, urllib.parse, urllib.error
import libsvm_formatter
from optparse import OptionParser
@@ -14,7 +14,7 @@ solrQueryUrl = ""
def setupSolr(collection, host, port, featuresFile, featureStoreName):
'''Sets up solr with the proper features for the test'''
- conn = httplib.HTTPConnection(host, port)
+ conn = http.client.HTTPConnection(host, port)
baseUrl = "/solr/" + collection
featureUrl = baseUrl + "/schema/feature-store"
@@ -22,10 +22,10 @@ def setupSolr(collection, host, port, featuresFile, featureStoreName):
conn.request("DELETE", featureUrl+"/"+featureStoreName)
r = conn.getresponse()
msg = r.read()
- if (r.status != httplib.OK and
- r.status != httplib.CREATED and
- r.status != httplib.ACCEPTED and
- r.status != httplib.NOT_FOUND):
+ if (r.status != http.client.OK and
+ r.status != http.client.CREATED and
+ r.status != http.client.ACCEPTED and
+ r.status != http.client.NOT_FOUND):
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
@@ -36,11 +36,11 @@ def setupSolr(collection, host, port, featuresFile, featureStoreName):
conn.request("POST", featureUrl, featuresBody, headers)
r = conn.getresponse()
msg = r.read()
- if (r.status != httplib.OK and
- r.status != httplib.ACCEPTED):
- print r.status
- print ""
- print r.reason;
+ if (r.status != http.client.OK and
+ r.status != http.client.ACCEPTED):
+ print(r.status)
+ print("")
+ print(r.reason);
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
conn.close()
@@ -66,12 +66,12 @@ def generateHttpRequest(collection, requestHandler, solrFeatureStoreName, efiPar
solrQueryUrl += ("?fl=" + ",".join([ "id", "score", "[features store="+solrFeatureStoreName+" "+efiParams+"]" ]))
solrQueryUrl += "&q="
solrQueryUrl = solrQueryUrl.replace(" ","+")
- solrQueryUrl += urllib.quote_plus("id:")
+ solrQueryUrl += urllib.parse.quote_plus("id:")
- userQuery = urllib.quote_plus(searchText.strip().replace("'","\\'").replace("/","\\\\/"))
- solrQuery = solrQueryUrl + '"' + urllib.quote_plus(docId) + '"' #+ solrQueryUrlEnd
- solrQuery = solrQuery.replace("%24USERQUERY", userQuery).replace('$USERQUERY', urllib.quote_plus("\\'" + userQuery + "\\'"))
+ userQuery = urllib.parse.quote_plus(searchText.strip().replace("'","\\'").replace("/","\\\\/"))
+ solrQuery = solrQueryUrl + '"' + urllib.parse.quote_plus(docId) + '"' #+ solrQueryUrlEnd
+ solrQuery = solrQuery.replace("%24USERQUERY", userQuery).replace('$USERQUERY', urllib.parse.quote_plus("\\'" + userQuery + "\\'"))
return solrQuery
@@ -79,7 +79,7 @@ def generateHttpRequest(collection, requestHandler, solrFeatureStoreName, efiPar
def generateTrainingData(solrQueries, host, port):
'''Given a list of solr queries, yields a tuple of query , docId , score , source , feature vector for each query.
Feature Vector is a list of strings of form "key=value"'''
- conn = httplib.HTTPConnection(host, port)
+ conn = http.client.HTTPConnection(host, port)
headers = {"Connection":" keep-alive"}
try:
@@ -94,22 +94,22 @@ def generateTrainingData(solrQueries, host, port):
if not msgDict['response']['docs'][0]["[features]"] == None:
fv = msgDict['response']['docs'][0]["[features]"];
else:
- print "ERROR NULL FV FOR: " + docId;
- print msg
+ print("ERROR NULL FV FOR: " + docId);
+ print(msg)
continue;
else:
- print "ERROR FOR: " + docId;
- print msg
+ print("ERROR FOR: " + docId);
+ print(msg)
continue;
- if r.status == httplib.OK:
+ if r.status == http.client.OK:
#print "http connection was ok for: " + queryUrl
yield(query,docId,score,source,fv.split(","));
else:
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
except Exception as e:
- print msg
- print e
+ print(msg)
+ print(e)
conn.close()
@@ -118,23 +118,23 @@ def uploadModel(collection, host, port, modelFile, modelName):
modelUrl = "/solr/" + collection + "/schema/model-store"
headers = {'Content-type': 'application/json'}
with open(modelFile) as modelBody:
- conn = httplib.HTTPConnection(host, port)
+ conn = http.client.HTTPConnection(host, port)
conn.request("DELETE", modelUrl+"/"+modelName)
r = conn.getresponse()
msg = r.read()
- if (r.status != httplib.OK and
- r.status != httplib.CREATED and
- r.status != httplib.ACCEPTED and
- r.status != httplib.NOT_FOUND):
+ if (r.status != http.client.OK and
+ r.status != http.client.CREATED and
+ r.status != http.client.ACCEPTED and
+ r.status != http.client.NOT_FOUND):
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
conn.request("POST", modelUrl, modelBody, headers)
r = conn.getresponse()
msg = r.read()
- if (r.status != httplib.OK and
- r.status != httplib.CREATED and
- r.status != httplib.ACCEPTED):
+ if (r.status != http.client.OK and
+ r.status != http.client.CREATED and
+ r.status != http.client.ACCEPTED):
raise Exception("Status: {0} {1}\nResponse: {2}".format(r.status, r.reason, msg))
@@ -155,24 +155,24 @@ def main(argv=None):
with open(options.configFile) as configFile:
config = json.load(configFile)
- print "Uploading features ("+config["solrFeaturesFile"]+") to Solr"
+ print("Uploading features ("+config["solrFeaturesFile"]+") to Solr")
setupSolr(config["collection"], config["host"], config["port"], config["solrFeaturesFile"], config["solrFeatureStoreName"])
- print "Converting user queries ("+config["userQueriesFile"]+") into Solr queries for feature extraction"
+ print("Converting user queries ("+config["userQueriesFile"]+") into Solr queries for feature extraction")
reRankQueries = generateQueries(config["userQueriesFile"], config["collection"], config["requestHandler"], config["solrFeatureStoreName"], config["efiParams"])
- print "Running Solr queries to extract features"
+ print("Running Solr queries to extract features")
fvGenerator = generateTrainingData(reRankQueries, config["host"], config["port"])
formatter = libsvm_formatter.LibSvmFormatter();
formatter.processQueryDocFeatureVector(fvGenerator,config["trainingFile"]);
- print "Training model using '"+config["trainingLibraryLocation"]+" "+config["trainingLibraryOptions"]+"'"
+ print("Training model using '"+config["trainingLibraryLocation"]+" "+config["trainingLibraryOptions"]+"'")
libsvm_formatter.trainLibSvm(config["trainingLibraryLocation"],config["trainingLibraryOptions"],config["trainingFile"],config["trainedModelFile"])
- print "Converting trained model ("+config["trainedModelFile"]+") to solr model ("+config["solrModelFile"]+")"
+ print("Converting trained model ("+config["trainedModelFile"]+") to solr model ("+config["solrModelFile"]+")")
formatter.convertLibSvmModelToLtrModel(config["trainedModelFile"], config["solrModelFile"], config["solrModelName"], config["solrFeatureStoreName"])
- print "Uploading model ("+config["solrModelFile"]+") to Solr"
+ print("Uploading model ("+config["solrModelFile"]+") to Solr")
uploadModel(config["collection"], config["host"], config["port"], config["solrModelFile"], config["solrModelName"])