Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ script prior to upgrading to minimize the downtime.
- In the docker container the folder /home/clowder/data is now whitelisted by default for uploading by reference.
This can be changed using the environment variable CLOWDER_SOURCEPATH.
- The current CLA for developers of clowder.
- sitemap.xml route to list dataset pages so they can be crawled for thier embedded jsonld, for google dataset search

### Fixed
- Send email to all admins in a single email when a user submits 'Request access' for a space
Expand Down
46 changes: 46 additions & 0 deletions app/controllers/Application.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import models.{Event, UUID, UserStatus}
import play.api.Play.current
import play.api.mvc.Action
import play.api.{Logger, Play, Routes}
import play.api.libs.json._
import services._
import util.Formatters.sanitizeHTML

Expand Down Expand Up @@ -84,6 +85,51 @@ class Application @Inject()(files: FileService, collections: CollectionService,
}
}


/**
* Returns the sitemap.xml for the datasets to be scraped for their jsonld scripts
* suggested to start like w/swagger route, but if I don't cache it, then I should change this
* otherwise it will need a filler file there; which I should provide as a cache
*/
def sitemap = Action { implicit request =>
Play.resource("/public/sitemap.xml") match { //in case we cache it here someday
case Some(resource) => {
val https = Utils.https(request)
val clowderurl = new URL(Utils.baseUrl(request))
val host = if (clowderurl.getPort == -1) {
clowderurl.getHost
} else {
clowderurl.getHost + ":" + clowderurl.getPort
}
var resultStr=""
val top= """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> """
resultStr = resultStr.concat(top)
//though had called the route2get but couldn't change datastruct
val d = scala.io.Source.fromURL(clowderurl + "/api/datasets")
Comment thread
MBcode marked this conversation as resolved.
Outdated
val sd = d.mkString
val parsedJson = Json.parse(sd)
val idl = (parsedJson \\ "id")
val id1=idl(1)
var uStr = ""
idl.foreach( id => {
val id_ = id.as[String]
uStr = "\n<url><loc>" + clowderurl + "/datasets/" + id_ + "</loc></url>"
resultStr = resultStr.concat(uStr)
})
resultStr = resultStr + "\n</urlset>"
//could cache, in case we want to reuse later, w/Ok(reult.mkString)
//_would again check cache before creating, but still problems w/:
//BufferedWriter writer = new BufferedWriter(new FileWriter(resource));
//writer.write(resultStr); writer.close(); //getting errors again w/this
//val resultStr = "ret string vs file"
Ok(resultStr.mkString)
}
case None => NotFound("Could not find sitemap.xml")
}
}


/**
* Main page.
*/
Expand Down
6 changes: 6 additions & 0 deletions conf/routes
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,12 @@ GET /javascriptRoutes
# ----------------------------------------------------------------------
GET /swagger @controllers.Application.swagger
GET /swaggerUI @controllers.Application.swaggerUI

# ----------------------------------------------------------------------
# SITEMAP
# ----------------------------------------------------------------------
GET /sitemap.xml @controllers.Application.sitemap
GET /sitemap @controllers.Application.sitemap

# ----------------------------------------------------------------------
# RESTful API
Expand Down
4 changes: 4 additions & 0 deletions conf/sitemap.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
=placeholder right now:
Route setup to read from this cached file, so expects it
even though the caching hasn't been done yet
and right now it is returning it directly