Elasticsearch - Defining the mapping of Twitter Data

Exploring the dynamic mapping

PUT twitter/_doc/1
{
"text" : "The text of the tweet",
"created_at" : "Thu Jul 31 23:00:09 +0000 2014",
"other_field": "",
.....
}
GET /twitter/_mapping
{
"twitter": {
"mappings": {
"_doc": {
"properties": {
"coordinates": {
"properties": {
"coordinates": {
"type": "float"
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"created_at": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"favorite_count": {
"type": "long"
},
"geo": {
"properties": {
"coordinates": {
"type": "float"
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"id": {
"type": "long"
},
"place": {
"properties": {
"attributes": {
"type": "object"
},
"bounding_box": {
"properties": {
"coordinates": {
"type": "float"
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"country": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"full_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"text": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user": {
"properties": {
"contributors_enabled": {
"type": "boolean"
},
"created_at": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"favourites_count": {
"type": "long"
},
"followers_count": {
"type": "long"
},
"friends_count": {
"type": "long"
},
"geo_enabled": {
"type": "boolean"
},
"listed_count": {
"type": "long"
},
"location": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"screen_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"statuses_count": {
"type": "long"
}
}
}
}
}
}
}
}

Defining the mapping

"created_at": "Thu Jul 31 23:00:09 +0000 2014"
"EEE MMM dd HH:mm:ss Z yyyy"
PUT twitter
{
"mappings": {
"_doc": {
"properties": {
"created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
},
"retweeted_status.created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
},

"user.created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
},
"retweeted_status.user.created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
}
}
}
}
}
PUT twitter
{
"mappings": {
"_doc": {
"properties": {
"coordinates.coordinates": {
"type": "geo_point"
}
}
}
}
}
"bounding_box": {
"type": "Polygon",
"coordinates": [
[
[-4.43193, 55.864109],
[-4.43193, 55.864109],
[-4.43193, 55.864109],
[-4.43193, 55.864109]
]
]
}
"place.bounding_box": {
"type": "geo_shape",
"coerce": true,
"ignore_malformed": true
}
PUT twitter
{
"mappings": {
"_doc": {
"properties": {
"created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
},
"retweeted_status.created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
},

"user.created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
},
"retweeted_status.user.created_at": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z yyyy"
},
"coordinates.coordinates": {
"type": "geo_point"
},
"place.bounding_box": {
"type": "geo_shape",
"coerce": true,
"ignore_malformed": true
}
}
}
}
}

Wrapping up

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Christina Boididou

Christina Boididou

60 Followers

Data enthusiast, Machine Learning fan. Doing data work at the BBC