I know similar questions have been asked here, but I'm still struggling to find a solution here. I'm able to parse raw HTML from the bandsintown website, using beautifulSoup, but my ultimate goal is to access the script on the page and access a JSON embedded in the script. Opening the page source, I can see that "eventsJsonLd" is what I need:
"jsonLdContainer":{"eventsJsonLd":[{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-25","endDate":"2019-01-25","url":"https://www.bandsintown.com/e/100451456-pop-rocks-at-hopmonk-tavern-novato?came_from=244","location":{"#type":"Place","name":"HopMonk Tavern Novato","address":"Novato, CA","geo":{"#type":"GeoCoordinates","latitude":38.1074198,"longitude":-122.5697032}},"name":"Pop Rocks","performer":{"#type":"MusicGroup","name":"Pop Rocks","image":"https://photos.bandsintown.com/thumb/8532836.jpeg","url":"https://www.bandsintown.com/a/29109-pop-rocks?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8532836.jpeg"},
Here's my code:
#define url and build url array to cycle through webpages
page = 'https://www.bandsintown.com/?came_from=257&page='
urlBucket = []
for i in range (0,2):
uniqueUrl = page + str(i)
urlBucket.append(uniqueUrl)
# dump response into an array
responseBucket = []
for i in urlBucket:
uniqueResponse = requests.get(i)
responseBucket.append(uniqueResponse)
#Make the 'soup'
soupBucket = []
for i in responseBucket:
individualSoup = BeautifulSoup(i.text, 'html.parser')
soupBucket.append(individualSoup)
# Build an array to hold script
allScript = []
for i in soupBucket:
script = i.find_all("script")[4]
eventsJSON = json.loads(script)
print script
allScript.append(script)
print allScript
Print allScript gives me the following:
[<script type="application/ld+json">[{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100653596-e.r.n.e.s.t.o-at-the-endup?came_from=244","location":{"#type":"Place","name":"The EndUp","address":"SF, CA","geo":{"#type":"GeoCoordinates","latitude":37.7726402,"longitude":-122.4099154}},"name":"E.R.N.E.S.T.O","performer":{"#type":"MusicGroup","name":"E.R.N.E.S.T.O","image":"https://photos.bandsintown.com/thumb/8618862.jpeg","url":"https://www.bandsintown.com/a/4693798-e.r.n.e.s.t.o?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8618862.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012239291-j.j.-grey-and-mofro-at-uptown-theatre-napa?came_from=244","location":{"#type":"Place","name":"Uptown Theatre Napa","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.2963465,"longitude":-122.2873698}},"name":"J.J. Grey & Mofro","performer":{"#type":"MusicGroup","name":"J.J. Grey & Mofro","image":"https://photos.bandsintown.com/thumb/219177.jpeg","url":"https://www.bandsintown.com/a/2327212-j.j.-grey-and-mofro?came_from=244"},"image":"https://photos.bandsintown.com/thumb/219177.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012239613-j.j.-grey-at-uptown-theatre-napa?came_from=244","location":{"#type":"Place","name":"Uptown Theatre Napa","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.2963465,"longitude":-122.2873698}},"name":"J.J. Grey","performer":{"#type":"MusicGroup","name":"J.J. Grey","image":"","url":"https://www.bandsintown.com/a/12437162-j.j.-grey?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012239435-mofro-at-uptown-theatre-napa?came_from=244","location":{"#type":"Place","name":"Uptown Theatre Napa","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.2963465,"longitude":-122.2873698}},"name":"Mofro","performer":{"#type":"MusicGroup","name":"Mofro","image":"","url":"https://www.bandsintown.com/a/71714-mofro?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100542800-brooke-heinichen-at-stuffed?came_from=244","location":{"#type":"Place","name":"Stuffed","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.7485824,"longitude":-122.4184108}},"name":"Brooke Heinichen","performer":{"#type":"MusicGroup","name":"Brooke Heinichen","image":"https://photos.bandsintown.com/thumb/8921909.jpeg","url":"https://www.bandsintown.com/a/14944274-brooke-heinichen?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8921909.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012486121-william-fitzsimmons-at-hopmonk-tavern?came_from=244","location":{"#type":"Place","name":"Hopmonk Tavern","address":"Novato, CA","geo":{"#type":"GeoCoordinates","latitude":38.088489,"longitude":-122.553449}},"name":"William Fitzsimmons","performer":{"#type":"MusicGroup","name":"William Fitzsimmons","image":"https://photos.bandsintown.com/thumb/8852940.jpeg","url":"https://www.bandsintown.com/a/2450-william-fitzsimmons?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8852940.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100581554-kevin-paris-at-acoustic-yoga-#-yoga-source-los-gatos?came_from=244","location":{"#type":"Place","name":"Acoustic Yoga # Yoga Source Los Gatos","address":"Los Gatos, CA","geo":{"#type":"GeoCoordinates","latitude":37.2358078,"longitude":-121.9623751}},"name":"Kevin Paris","performer":{"#type":"MusicGroup","name":"Kevin Paris","image":"https://photos.bandsintown.com/thumb/8419497.jpeg","url":"https://www.bandsintown.com/a/1134314-kevin-paris?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8419497.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100692435-zak-fennie-at-black-stallion-winery?came_from=244","location":{"#type":"Place","name":"Black Stallion Winery","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.35983179999999,"longitude":-122.2906388}},"name":"Zak Fennie","performer":{"#type":"MusicGroup","name":"Zak Fennie","image":"https://photos.bandsintown.com/thumb/8851546.jpeg","url":"https://www.bandsintown.com/a/11843851-zak-fennie?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8851546.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100621943-frances-ancheta-at-off-the-grid-at-alameda-south-shore-center?came_from=244","location":{"#type":"Place","name":"Off the Grid at Alameda South Shore Center ","address":"Alameda, CA","geo":{"#type":"GeoCoordinates","latitude":37.7712165,"longitude":-122.2824021}},"name":"Frances Ancheta","performer":{"#type":"MusicGroup","name":"Frances Ancheta","image":"https://photos.bandsintown.com/thumb/8483059.jpeg","url":"https://www.bandsintown.com/a/7762254-frances-ancheta?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8483059.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1013412612-pizza!-at-audio-nightclub?came_from=244","location":{"#type":"Place","name":"Audio Nightclub","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.771362,"longitude":-122.413795}},"name":"Pizza!","performer":{"#type":"MusicGroup","name":"Pizza!","image":"https://photos.bandsintown.com/thumb/161356.jpeg","url":"https://www.bandsintown.com/a/198680-pizza!?came_from=244"},"image":"https://photos.bandsintown.com/thumb/161356.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100372855-ryan-scott-long-at-drake's-barrel-house?came_from=244","location":{"#type":"Place","name":"Drake\u2019s barrel house ","address":"San Leandro, Ca","geo":{"#type":"GeoCoordinates","latitude":37.7249296,"longitude":-122.1560768}},"name":"Ryan Scott Long","performer":{"#type":"MusicGroup","name":"Ryan Scott Long","image":"https://photos.bandsintown.com/thumb/8671372.jpeg","url":"https://www.bandsintown.com/a/3168705-ryan-scott-long?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8671372.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012999412-come-from-away-at-golden-gate-theater?came_from=244","location":{"#type":"Place","name":"Golden Gate Theater","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.7825715,"longitude":-122.4110742}},"name":"Come From Away","performer":{"#type":"MusicGroup","name":"Come From Away","image":"","url":"https://www.bandsintown.com/a/13889714-come-from-away?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100441096-and-then-came-humans-at-drake's-brewing-company?came_from=244","location":{"#type":"Place","name":"Drake\u2019s Brewing Company","address":"San Leandro, Ca","geo":{"#type":"GeoCoordinates","latitude":37.7249296,"longitude":-122.1560768}},"name":"And Then Came Humans","performer":{"#type":"MusicGroup","name":"And Then Came Humans","image":"https://photos.bandsintown.com/thumb/8897159.jpeg","url":"https://www.bandsintown.com/a/13151463-and-then-came-humans?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8897159.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1011601412-man-go-at-el-rio?came_from=244","location":{"#type":"Place","name":"El Rio","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.7467828,"longitude":-122.4193922}},"name":"Man-Go","performer":{"#type":"MusicGroup","name":"Man-Go","image":"","url":"https://www.bandsintown.com/a/3238684-man-go?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1013320819-paul-mehling-at-freight-and-salvage-coffeehouse?came_from=244","location":{"#type":"Place","name":"Freight & Salvage Coffeehouse","address":"Berkeley, CA","geo":{"#type":"GeoCoordinates","latitude":37.8708715,"longitude":-122.2695117}},"name":"Paul Mehling","performer":{"#type":"MusicGroup","name":"Paul Mehling","image":"","url":"https://www.bandsintown.com/a/3307749-paul-mehling?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100672210-dj-spooky-at-catharine-clark-gallery?came_from=244","location":{"#type":"Place","name":"Catharine Clark Gallery","address":"SF, CA","geo":{"#type":"GeoCoordinates","latitude":37.76639,"longitude":-122.40704}},"name":"DJ Spooky","performer":{"#type":"MusicGroup","name":"DJ Spooky","image":"https://photos.bandsintown.com/thumb/7060233.jpeg","url":"https://www.bandsintown.com/a/64476-dj-spooky?came_from=244"},"image":"https://photos.bandsintown.com/thumb/7060233.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012003162-craig-ventresco-at-atlas-cafe?came_from=244","location":{"#type":"Place","name":"Atlas Cafe","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.73189,"longitude":-122.47615}},"name":"Craig Ventresco","performer":{"#type":"MusicGroup","name":"Craig Ventresco","image":"","url":"https://www.bandsintown.com/a/139634-craig-ventresco?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100555258-rusty-jackson-music-at-kawika's-ocean-beach-deli?came_from=244","location":{"#type":"Place","name":"Kawika's Ocean Beach Deli","address":"SF, CA","geo":{"#type":"GeoCoordinates","latitude":37.774627,"longitude":-122.509993}},"name":"Rusty Jackson Music","performer":{"#type":"MusicGroup","name":"Rusty Jackson Music","image":"https://photos.bandsintown.com/thumb/8250003.jpeg","url":"https://www.bandsintown.com/a/9978762-rusty-jackson-music?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8250003.jpeg"}]</script>, <script type="application/ld+json">[{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100653596-e.r.n.e.s.t.o-at-the-endup?came_from=244","location":{"#type":"Place","name":"The EndUp","address":"SF, CA","geo":{"#type":"GeoCoordinates","latitude":37.7726402,"longitude":-122.4099154}},"name":"E.R.N.E.S.T.O","performer":{"#type":"MusicGroup","name":"E.R.N.E.S.T.O","image":"https://photos.bandsintown.com/thumb/8618862.jpeg","url":"https://www.bandsintown.com/a/4693798-e.r.n.e.s.t.o?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8618862.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012239291-j.j.-grey-and-mofro-at-uptown-theatre-napa?came_from=244","location":{"#type":"Place","name":"Uptown Theatre Napa","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.2963465,"longitude":-122.2873698}},"name":"J.J. Grey & Mofro","performer":{"#type":"MusicGroup","name":"J.J. Grey & Mofro","image":"https://photos.bandsintown.com/thumb/219177.jpeg","url":"https://www.bandsintown.com/a/2327212-j.j.-grey-and-mofro?came_from=244"},"image":"https://photos.bandsintown.com/thumb/219177.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012239613-j.j.-grey-at-uptown-theatre-napa?came_from=244","location":{"#type":"Place","name":"Uptown Theatre Napa","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.2963465,"longitude":-122.2873698}},"name":"J.J. Grey","performer":{"#type":"MusicGroup","name":"J.J. Grey","image":"","url":"https://www.bandsintown.com/a/12437162-j.j.-grey?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012239435-mofro-at-uptown-theatre-napa?came_from=244","location":{"#type":"Place","name":"Uptown Theatre Napa","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.2963465,"longitude":-122.2873698}},"name":"Mofro","performer":{"#type":"MusicGroup","name":"Mofro","image":"","url":"https://www.bandsintown.com/a/71714-mofro?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100542800-brooke-heinichen-at-stuffed?came_from=244","location":{"#type":"Place","name":"Stuffed","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.7485824,"longitude":-122.4184108}},"name":"Brooke Heinichen","performer":{"#type":"MusicGroup","name":"Brooke Heinichen","image":"https://photos.bandsintown.com/thumb/8921909.jpeg","url":"https://www.bandsintown.com/a/14944274-brooke-heinichen?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8921909.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012486121-william-fitzsimmons-at-hopmonk-tavern?came_from=244","location":{"#type":"Place","name":"Hopmonk Tavern","address":"Novato, CA","geo":{"#type":"GeoCoordinates","latitude":38.088489,"longitude":-122.553449}},"name":"William Fitzsimmons","performer":{"#type":"MusicGroup","name":"William Fitzsimmons","image":"https://photos.bandsintown.com/thumb/8852940.jpeg","url":"https://www.bandsintown.com/a/2450-william-fitzsimmons?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8852940.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100581554-kevin-paris-at-acoustic-yoga-#-yoga-source-los-gatos?came_from=244","location":{"#type":"Place","name":"Acoustic Yoga # Yoga Source Los Gatos","address":"Los Gatos, CA","geo":{"#type":"GeoCoordinates","latitude":37.2358078,"longitude":-121.9623751}},"name":"Kevin Paris","performer":{"#type":"MusicGroup","name":"Kevin Paris","image":"https://photos.bandsintown.com/thumb/8419497.jpeg","url":"https://www.bandsintown.com/a/1134314-kevin-paris?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8419497.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100692435-zak-fennie-at-black-stallion-winery?came_from=244","location":{"#type":"Place","name":"Black Stallion Winery","address":"Napa, CA","geo":{"#type":"GeoCoordinates","latitude":38.35983179999999,"longitude":-122.2906388}},"name":"Zak Fennie","performer":{"#type":"MusicGroup","name":"Zak Fennie","image":"https://photos.bandsintown.com/thumb/8851546.jpeg","url":"https://www.bandsintown.com/a/11843851-zak-fennie?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8851546.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100621943-frances-ancheta-at-off-the-grid-at-alameda-south-shore-center?came_from=244","location":{"#type":"Place","name":"Off the Grid at Alameda South Shore Center ","address":"Alameda, CA","geo":{"#type":"GeoCoordinates","latitude":37.7712165,"longitude":-122.2824021}},"name":"Frances Ancheta","performer":{"#type":"MusicGroup","name":"Frances Ancheta","image":"https://photos.bandsintown.com/thumb/8483059.jpeg","url":"https://www.bandsintown.com/a/7762254-frances-ancheta?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8483059.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1013412612-pizza!-at-audio-nightclub?came_from=244","location":{"#type":"Place","name":"Audio Nightclub","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.771362,"longitude":-122.413795}},"name":"Pizza!","performer":{"#type":"MusicGroup","name":"Pizza!","image":"https://photos.bandsintown.com/thumb/161356.jpeg","url":"https://www.bandsintown.com/a/198680-pizza!?came_from=244"},"image":"https://photos.bandsintown.com/thumb/161356.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100372855-ryan-scott-long-at-drake's-barrel-house?came_from=244","location":{"#type":"Place","name":"Drake\u2019s barrel house ","address":"San Leandro, Ca","geo":{"#type":"GeoCoordinates","latitude":37.7249296,"longitude":-122.1560768}},"name":"Ryan Scott Long","performer":{"#type":"MusicGroup","name":"Ryan Scott Long","image":"https://photos.bandsintown.com/thumb/8671372.jpeg","url":"https://www.bandsintown.com/a/3168705-ryan-scott-long?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8671372.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012999412-come-from-away-at-golden-gate-theater?came_from=244","location":{"#type":"Place","name":"Golden Gate Theater","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.7825715,"longitude":-122.4110742}},"name":"Come From Away","performer":{"#type":"MusicGroup","name":"Come From Away","image":"","url":"https://www.bandsintown.com/a/13889714-come-from-away?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100441096-and-then-came-humans-at-drake's-brewing-company?came_from=244","location":{"#type":"Place","name":"Drake\u2019s Brewing Company","address":"San Leandro, Ca","geo":{"#type":"GeoCoordinates","latitude":37.7249296,"longitude":-122.1560768}},"name":"And Then Came Humans","performer":{"#type":"MusicGroup","name":"And Then Came Humans","image":"https://photos.bandsintown.com/thumb/8897159.jpeg","url":"https://www.bandsintown.com/a/13151463-and-then-came-humans?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8897159.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1011601412-man-go-at-el-rio?came_from=244","location":{"#type":"Place","name":"El Rio","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.7467828,"longitude":-122.4193922}},"name":"Man-Go","performer":{"#type":"MusicGroup","name":"Man-Go","image":"","url":"https://www.bandsintown.com/a/3238684-man-go?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1013320819-paul-mehling-at-freight-and-salvage-coffeehouse?came_from=244","location":{"#type":"Place","name":"Freight & Salvage Coffeehouse","address":"Berkeley, CA","geo":{"#type":"GeoCoordinates","latitude":37.8708715,"longitude":-122.2695117}},"name":"Paul Mehling","performer":{"#type":"MusicGroup","name":"Paul Mehling","image":"","url":"https://www.bandsintown.com/a/3307749-paul-mehling?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100672210-dj-spooky-at-catharine-clark-gallery?came_from=244","location":{"#type":"Place","name":"Catharine Clark Gallery","address":"SF, CA","geo":{"#type":"GeoCoordinates","latitude":37.76639,"longitude":-122.40704}},"name":"DJ Spooky","performer":{"#type":"MusicGroup","name":"DJ Spooky","image":"https://photos.bandsintown.com/thumb/7060233.jpeg","url":"https://www.bandsintown.com/a/64476-dj-spooky?came_from=244"},"image":"https://photos.bandsintown.com/thumb/7060233.jpeg"},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/1012003162-craig-ventresco-at-atlas-cafe?came_from=244","location":{"#type":"Place","name":"Atlas Cafe","address":"San Francisco, CA","geo":{"#type":"GeoCoordinates","latitude":37.73189,"longitude":-122.47615}},"name":"Craig Ventresco","performer":{"#type":"MusicGroup","name":"Craig Ventresco","image":"","url":"https://www.bandsintown.com/a/139634-craig-ventresco?came_from=244"},"image":""},{"#context":"http://schema.org","#type":"MusicEvent","startDate":"2019-01-26","endDate":"2019-01-26","url":"https://www.bandsintown.com/e/100555258-rusty-jackson-music-at-kawika's-ocean-beach-deli?came_from=244","location":{"#type":"Place","name":"Kawika's Ocean Beach Deli","address":"SF, CA","geo":{"#type":"GeoCoordinates","latitude":37.774627,"longitude":-122.509993}},"name":"Rusty Jackson Music","performer":{"#type":"MusicGroup","name":"Rusty Jackson Music","image":"https://photos.bandsintown.com/thumb/8250003.jpeg","url":"https://www.bandsintown.com/a/9978762-rusty-jackson-music?came_from=244"},"image":"https://photos.bandsintown.com/thumb/8250003.jpeg"}]</script>]
But, printing eventsJSON gives me an error:
TypeError: expected string or buffer
I want to be able to build a new JSON based on specific attributes in eventsJsonLd, ie "startDate", "name", etc. Can anyone tell me where I'm going wrong? Thanks in advance.
You are passing the script tag into json.loads, this is not string but an object of the bs4.element.Tag class.
script = i.find_all("script")[4]
print(type(script))
Output
<class 'bs4.element.Tag'>
You need to get the text from the tag and pass it to json.loads
eventsJSON = json.loads(script.text)
Note:
The current url you try (https://www.bandsintown.com/?came_from=257&page=0) has the contents of that script tag as empty, i was able to get an output for a different url (https://www.bandsintown.com/a/29109-pop-rocks) of the same domain.
print(eventsJSON[0])
Gave an output
{u'startDate': u'2019-02-15T21:00:00', u'performer': {u'url': u'https://www.bandsintown.com/a/29109-pop-rocks?came_from=244', u'image': u'https://photos.bandsintown.com/thumb/8532836.jpeg', u'#type': u'MusicGroup', u'name': u'Pop Rocks'}, u'name': u'Pop Rocks', u'url': u'https://www.bandsintown.com/e/100544648-pop-rocks-at-the-chapel?came_from=244', u'image': u'https://photos.bandsintown.com/thumb/8532836.jpeg', u'location': {u'address': u'San Francisco, CA', u'geo': {u'latitude': 37.7485824, u'#type': u'GeoCoordinates', u'longitude': -122.4184108}, u'#type': u'Place', u'name': u'The Chapel'}, u'#context': u'http://schema.org', u'#type': u'MusicEvent', u'description': u'Pop Rocks at The Chapel 2019-02-15T21:00:00'}
I've been using the searchTwitter function from the Twitter REST API to retrieve a certain amount of tweets and I've dumped this to a TXT file.
The structure of this TXT file is:
"text" "favorited" "favoriteCount" "replyToSN" "created" "truncated" "replyToSID" "id" "replyToUID" "statusSource" "screenName" "retweetCount" "isRetweet" "retweeted" "longitude" "latitude"
"1" "RT #kobebryant: Last night was the final chapter to an incredible story. I walk away at peace knowing my love for the game & this city will…" FALSE 0 NA 2016-04-14 23:59:59 FALSE NA "720763566027096066" NA "Twitter for iPhone" "JtLONGWAY" 204125 TRUE FALSE NA NA
"2" "RT #kobebryant: Last night was the final chapter to an incredible story. I walk away at peace knowing my love for the game & this city will…" FALSE 0 NA 2016-04-14 23:59:59 FALSE NA "720763566014332928" NA "Twitter for Android" "Mr_Wizrd" 204125 TRUE FALSE NA NA
"3" "RT #MagicJohnson: I got a chance to get to know #kobebryant away from the court at the #Dodgers game! #ThankYouKobe #KB20 https://twitter.com/sVsW…" FALSE 0 NA 2016-04-14 23:59:59 FALSE NA "720763563783110661" NA "Twitter for iPhone" "TynashKobe" 777 TRUE FALSE NA NA
and I would like to have this as a JSON structure, i.e.
{"created_at":"Wed Apr 13 22:06:02 +0000 2016","id":720372500065071104,"id_str":"720372500065071104","text":"RT #STAPLESCenter: This is where #kobebryant will hold is final press conference tonight. #ThankYouKobe https:\/\/t.co\/1rTiq5eAS9","source":"\u003ca href=\"http:\/\/tweetlogix.com\" rel=\"nofollow\"\u003eTweetlogix\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":149681225,"id_str":"149681225","name":"SP","screen_name":"Mr_LayedBak","location":"West side of Detroit","url":null,"description":"Unfollow me if you're easily offended","protected":false,"verified":false,"followers_count":4326,"friends_count":597,"listed_count":105,"favourites_count":371,"statuses_count":227845,"created_at":"Sat May 29 23:21:29 +0000 2010","utc_offset":-14400,"time_zone":"Eastern Time (US & Canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"131516","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/736248613\/7d89d45f16e6c4e508a883aded1aac64.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/736248613\/7d89d45f16e6c4e508a883aded1aac64.jpeg","profile_background_tile":true,"profile_link_color":"141313","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"660A0A","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/719706881736974341\/XT8R51s8_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/719706881736974341\/XT8R51s8_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/149681225\/1452265608","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Wed Apr 13 21:37:36 +0000 2016","id":720365343500144640,"id_str":"720365343500144640","text":"This is where #kobebryant will hold is final press conference tonight. #ThankYouKobe https:\/\/t.co\/1rTiq5eAS9","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":28725783,"id_str":"28725783","name":"STAPLES Center","screen_name":"STAPLESCenter","location":"Los Angeles","url":"http:\/\/www.staplescenter.com","description":"Sports and Entertainment Center of the World located in downtown Los Angeles #LALIVE since 1999. Instagram: #staplescenterla","protected":false,"verified":true,"followers_count":82891,"friends_count":10907,"listed_count":862,"favourites_count":1905,"statuses_count":11024,"created_at":"Sat Apr 04 03:04:17 +0000 2009","utc_offset":-25200,"time_zone":"Pacific Time (US & Canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"131516","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/553367185700036609\/q6Kh8Ru8.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/553367185700036609\/q6Kh8Ru8.jpeg","profile_background_tile":true,"profile_link_color":"009999","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"EFEFEF","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/2394735481\/7rom2fzqu1vwrq94yzll_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/2394735481\/7rom2fzqu1vwrq94yzll_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/28725783\/1416251684","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":264,"favorite_count":439,"entities":{"hashtags":[{"text":"ThankYouKobe","indices":[71,84]}],"urls":[],"user_mentions":[{"screen_name":"kobebryant","name":"Kobe Bryant","id":1059194370,"id_str":"1059194370","indices":[14,25]}],"symbols":[],"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[85,108],"media_url":"http:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","url":"https:\/\/t.co\/1rTiq5eAS9","display_url":"pic.twitter.com\/1rTiq5eAS9","expanded_url":"http:\/\/twitter.com\/STAPLESCenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[85,108],"media_url":"http:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","url":"https:\/\/t.co\/1rTiq5eAS9","display_url":"pic.twitter.com\/1rTiq5eAS9","expanded_url":"http:\/\/twitter.com\/STAPLESCenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"ThankYouKobe","indices":[90,103]}],"urls":[],"user_mentions":[{"screen_name":"STAPLESCenter","name":"STAPLES Center","id":28725783,"id_str":"28725783","indices":[3,17]},{"screen_name":"kobebryant","name":"Kobe Bryant","id":1059194370,"id_str":"1059194370","indices":[33,44]}],"symbols":[],"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[104,127],"media_url":"http:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","url":"https:\/\/t.co\/1rTiq5eAS9","display_url":"pic.twitter.com\/1rTiq5eAS9","expanded_url":"http:\/\/twitter.com\/STAPLESCenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}},"source_status_id":720365343500144640,"source_status_id_str":"720365343500144640","source_user_id":28725783,"source_user_id_str":"28725783"}]},"extended_entities":{"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[104,127],"media_url":"http:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Cf9AdElVAAA7BqM.jpg","url":"https:\/\/twitter.com\/1rTiq5eAS9","display_url":"pic.twitter.com\/1rTiq5eAS9","expanded_url":"http:\/\/twitter.com\/STAPLESCenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}},"source_status_id":720365343500144640,"source_status_id_str":"720365343500144640","source_user_id":28725783,"source_user_id_str":"28725783"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1460585162546"}
I've been trying to load the TXT file with the read.csv(file, header = TRUE, sep ="") and the 1st problem I've found is that since the TXT is formed having the white space as separator for the header, then I get an error saying that there are more columns in the rows than in the header (of course as I'm trying to process also the text from the tweets).
If I don't specify the separator (i.e. read.csv(file)) and I dump the content in a dataframe, then I only get 1 column.
Any hint?
You could do something like
txt <- readLines("myfile.txt")
df <- read.table(text=sub("\\d+-\\d+-\\d+ \\d+:\\d+:\\d+", '"\\1"', txt), header=T)
library(jsonlite)
toJSON(df)
# [{"text":"RT #kobebryant: Last night was the final chapter to an incredible story. I walk ...
Problems arise, because the datetime column created is not wrapped in quotes. Thus, date and time get separated - and the number of header fields does not match anymore. (This simple approach may break if there are for example similar patterns in the textcolumn.)
I need a little help, in our class we've been playing around with GREP and SED commands in an attempt to learn how they work. More specifically we've been using sed commands to manipulate text and add tags.
So, we we're given an assignment, we've been given 500 lines of CSV fake data and it is our job to create a sed command that will automatically tag the data and tag any new data added down the road (theoretically).
Here's a few lines of our fake UN-TAGGED data, this is by default how we received it, as you can see all the data starts with a first name and ends with a web email:
FirstName,LastName,Company,Address,City,County,State,ZIP,Phone,Fax,Email,Web
"Essie","Vaill","Litronic Industries","14225 Hancock Dr","Anchorage","Anchorage","AK","99515","907-345-0962","907-345-1215","essie#vaill.com","http://www.essievaill.com"
"Cruz","Roudabush","Meridian Products","2202 S Central Ave","Phoenix","Maricopa","AZ","85004","602-252-4827","602-252-4009","cruz#roudabush.com","http://www.cruzroudabush.com"
"Billie","Tinnes","D & M Plywood Inc","28 W 27th St","New York","New York","NY","10001","212-889-5775","212-889-5764","billie#tinnes.com","http://www.billietinnes.com"
"Zackary","Mockus","Metropolitan Elevator Co","286 State St","Perth Amboy","Middlesex","NJ","08861","732-442-0638","732-442-5218","zackary#mockus.com","http://www.zackarymockus.com"
"Rosemarie","Fifield","Technology Services","3131 N Nimitz Hwy #-105","Honolulu","Honolulu","HI","96819","808-836-8966","808-836-6008","rosemarie#fifield.com","http://www.rosemariefifield.com"
"Bernard","Laboy","Century 21 Keewaydin Prop","22661 S Frontage Rd","Channahon","Will","IL","60410","815-467-0487","815-467-1244","bernard#laboy.com","http://www.bernardlaboy.com"
"Sue","Haakinson","Kim Peacock Beringhause","9617 N Metro Pky W","Phoenix","Maricopa","AZ","85051","602-953-2753","602-953-0355","sue#haakinson.com","http://www.suehaakinson.com"
"Valerie","Pou","Sea Port Record One Stop Inc","7475 Hamilton Blvd","Trexlertown","Lehigh","PA","18087","610-395-8743","610-395-6995","valerie#pou.com","http://www.valeriepou.com"
"Lashawn","Hasty","Kpff Consulting Engineers","815 S Glendora Ave","West Covina","Los Angeles","CA","91790","626-960-6738","626-960-1503","lashawn#hasty.com","http://www.lashawnhasty.com"
"Marianne","Earman","Albers Technologies Corp","6220 S Orange Blossom Trl","Orlando","Orange","FL","32809","407-857-0431","407-857-2506","marianne#earman.com","http://www.marianneearman.com"
"Justina","Dragaj","Uchner, David D Esq","2552 Poplar Ave","Memphis","Shelby","TN","38112","901-327-5336","901-327-2911","justina#dragaj.com","http://www.justinadragaj.com"
"Mandy","Mcdonnell","Southern Vermont Surveys","343 Bush St Se","Salem","Marion","OR","97302","503-371-8219","503-371-1118","mandy#mcdonnell.com","http://www.mandymcdonnell.com"
"Conrad","Lanfear","Kahler, Karen T Esq","49 Roche Way","Youngstown","Mahoning","OH","44512","330-758-0314","330-758-3536","conrad#lanfear.com","http://www.conradlanfear.com"
"Cyril","Behen","National Paper & Envelope Corp","1650 S Harbor Blvd","Anaheim","Orange","CA","92802","714-772-5050","714-772-3859","cyril#behen.com","http://www.cyrilbehen.com"
"Shelley","Groden","Norton, Robert L Esq","110 Broadway St","San Antonio","Bexar","TX","78205","210-229-3017","210-229-9757","shelley#groden.com","http://www.shelleygroden.com"
Our teacher wanted us to create sed commands that would automatically indent the data, add TR to the front and back of the data and add TD tags to each new field.
<HTML>
<HEAD><Title>Lab 4b by Andrey</Title></HEAD>
<BODY>
<table border="1">
<TR><TD>FirstName</TD><TD>LastName</TD><TD>Company</TD><TD>Address</TD><TD>City</TD><TD>County</TD><TD>State</TD><TD>ZIP</TD><TD>Phone</TD><TD>Fax</TD><TD>Email</TD><TD>Web</TD></TR>
<TR><TD>Essie</TD><TD>Vaill</TD><TD>Litronic Industries</TD><TD>14225 Hancock Dr</TD><TD>Anchorage</TD><TD>Anchorage</TD><TD>AK</TD><TD>99515</TD><TD>907-345-0962</TD><TD>907-345-1215</TD><TD>essie#vaill.com</TD><TD>http://www.essievaill.com</TD><TR>
<TR><TD>Cruz</TD><TD>Roudabush</TD><TD>Meridian Products</TD><TD>2202 S Central Ave</TD><TD>Phoenix</TD><TD>Maricopa</TD><TD>AZ</TD><TD>85004</TD><TD>602-252-4827</TD><TD>602-252-4009</TD><TD>cruz#roudabush.com</TD><TD>http://www.cruzroudabush.com</TD><TR>
<TR><TD>Billie</TD><TD>Tinnes</TD><TD>D & M Plywood Inc</TD><TD>28 W 27th St</TD><TD>New York</TD><TD>New York</TD><TD>NY</TD><TD>10001</TD><TD>212-889-5775</TD><TD>212-889-5764</TD><TD>billie#tinnes.com</TD><TD>http://www.billietinnes.com</TD><TR>
<TR><TD>Zackary</TD><TD>Mockus</TD><TD>Metropolitan Elevator Co</TD><TD>286 State St</TD><TD>Perth Amboy</TD><TD>Middlesex</TD><TD>NJ</TD><TD>08861</TD><TD>732-442-0638</TD><TD>732-442-5218</TD><TD>zackary#mockus.com</TD><TD>http://www.zackarymockus.com</TD><TR>
<TR><TD>Rosemarie</TD><TD>Fifield</TD><TD>Technology Services</TD><TD>3131 N Nimitz Hwy #-105</TD><TD>Honolulu</TD><TD>Honolulu</TD><TD>HI</TD><TD>96819</TD><TD>808-836-8966</TD><TD>808-836-6008</TD><TD>rosemarie#fifield.com</TD><TD>http://www.rosemariefifield.com<$
<TR><TD>Bernard</TD><TD>Laboy</TD><TD>Century 21 Keewaydin Prop</TD><TD>22661 S Frontage Rd</TD><TD>Channahon</TD><TD>Will</TD><TD>IL</TD><TD>60410</TD><TD>815-467-0487</TD><TD>815-467-1244</TD><TD>bernard#laboy.com</TD><TD>http://www.bernardlaboy.com</TD><TR>
<TR><TD>Sue</TD><TD>Haakinson</TD><TD>Kim Peacock Beringhause</TD><TD>9617 N Metro Pky W</TD><TD>Phoenix</TD><TD>Maricopa</TD><TD>AZ</TD><TD>85051</TD><TD>602-953-2753</TD><TD>602-953-0355</TD><TD>sue#haakinson.com</TD><TD>http://www.suehaakinson.com</TD><TR>
<TR><TD>Valerie</TD><TD>Pou</TD><TD>Sea Port Record One Stop Inc</TD><TD>7475 Hamilton Blvd</TD><TD>Trexlertown</TD><TD>Lehigh</TD><TD>PA</TD><TD>18087</TD><TD>610-395-8743</TD><TD>610-395-6995</TD><TD>valerie#pou.com</TD><TD>http://www.valeriepou.com</TD><TR>
<TR><TD>Lashawn</TD><TD>Hasty</TD><TD>Kpff Consulting Engineers</TD><TD>815 S Glendora Ave</TD><TD>West Covina</TD><TD>Los Angeles</TD><TD>CA</TD><TD>91790</TD><TD>626-960-6738</TD><TD>626-960-1503</TD><TD>lashawn#hasty.com</TD><TD>http://www.lashawnhasty.com</TD><T$
<TR><TD>Marianne</TD><TD>Earman</TD><TD>Albers Technologies Corp</TD><TD>6220 S Orange Blossom Trl</TD><TD>Orlando</TD><TD>Orange</TD><TD>FL</TD><TD>32809</TD><TD>407-857-0431</TD><TD>407-857-2506</TD><TD>marianne#earman.com</TD><TD>http://www.marianneearman.com</TD$
<TR><TD>Justina</TD><TD>Dragaj</TD><TD>Uchner David D Esq</TD><TD>2552 Poplar Ave</TD><TD>Memphis</TD><TD>Shelby</TD><TD>TN</TD><TD>38112</TD><TD>901-327-5336</TD><TD>901-327-2911</TD><TD>justina#dragaj.com</TD><TD>http://www.justinadragaj.com</TD><TR>
<TR><TD>Mandy</TD><TD>Mcdonnell</TD><TD>Southern Vermont Surveys</TD><TD>343 Bush St Se</TD><TD>Salem</TD><TD>Marion</TD><TD>OR</TD><TD>97302</TD><TD>503-371-8219</TD><TD>503-371-1118</TD><TD>mandy#mcdonnell.com</TD><TD>http://www.mandymcdonnell.com</TD><TR>
<TR><TD>Conrad</TD><TD>Lanfear</TD><TD>Kahler Karen T Esq</TD><TD>49 Roche Way</TD><TD>Youngstown</TD><TD>Mahoning</TD><TD>OH</TD><TD>44512</TD><TD>330-758-0314</TD><TD>330-758-3536</TD><TD>conrad#lanfear.com</TD><TD>http://www.conradlanfear.com</TD><TR>
<TR><TD>Cyril</TD><TD>Behen</TD><TD>National Paper & Envelope Corp</TD><TD>1650 S Harbor Blvd</TD><TD>Anaheim</TD><TD>Orange</TD><TD>CA</TD><TD>92802</TD><TD>714-772-5050</TD><TD>714-772-3859</TD><TD>cyril#behen.com</TD><TD>http://www.cyrilbehen.com</TD><TR>
<TR><TD>Shelley</TD><TD>Groden</TD><TD>Norton Robert L Esq</TD><TD>110 Broadway St</TD><TD>San Antonio</TD><TD>Bexar</TD><TD>TX</TD><TD>78205</TD><TD>210-229-3017</TD><TD>210-229-9757</TD><TD>shelley#groden.com</TD><TD>http://www.shelleygroden.com</TD><TR>
</table>
</BODY>
</HTML>
So, I was messing around and I tired to create a few sed commands that would mimic the second output.
My first attempt was:
#!/bin/sh
sed -e 's=^.*$=<TR><TD>&</TD></TR>=' input.csv
Unfortunately, this program only outputs something like this where I get TR TD at the beginning and end, but no TD tags inside:
<TR><TD>"Bryan","Rovell","All N All Shop","90 Hackensack St","East Rutherford","Bergen","NJ","07073","201-939-2788","201-939-9079","bryan#rovell.com","http://www.bryanrovell.com"</TD></TR>
<TR><TD>"Joey","Bolick","Utility Trailer Sales","7700 N Council Rd","Oklahoma City","Oklahoma","OK","73132","405-728-5972","405-728-5244","joey#bolick.com","http://www.joeybolick.com"</TD></TR>
I've also attempted to create individual seds to tag field, but instead I've only managed to tag each word, so I'm kinda stuck.
I'm partially on the right track, I think, but I need helping indenting and adding TD to the beginning & end of every field, along with TR to the beginning and end of each new column.
This is the main part of it:
$ sed -r 's:^"?: <TR><TD>:; s:"?,"?:</TD><TD>:g; s:"?$:</TD></TR>:' file
<TR><TD>FirstName</TD><TD>LastName</TD><TD>Company</TD><TD>Address</TD><TD>City</TD><TD>County</TD><TD>State</TD><TD>ZIP</TD><TD>Phone</TD><TD>Fax</TD><TD>Email</TD><TD>Web</TD></TR>
<TR><TD>Essie</TD><TD>Vaill</TD><TD>Litronic Industries</TD><TD>14225 Hancock Dr</TD><TD>Anchorage</TD><TD>Anchorage</TD><TD>AK</TD><TD>99515</TD><TD>907-345-0962</TD><TD>907-345-1215</TD><TD>essie#vaill.com</TD><TD>http://www.essievaill.com</TD></TR>
<TR><TD>Cruz</TD><TD>Roudabush</TD><TD>Meridian Products</TD><TD>2202 S Central Ave</TD><TD>Phoenix</TD><TD>Maricopa</TD><TD>AZ</TD><TD>85004</TD><TD>602-252-4827</TD><TD>602-252-4009</TD><TD>cruz#roudabush.com</TD><TD>http://www.cruzroudabush.com</TD></TR>
<TR><TD>Billie</TD><TD>Tinnes</TD><TD>D & M Plywood Inc</TD><TD>28 W 27th St</TD><TD>New York</TD><TD>New York</TD><TD>NY</TD><TD>10001</TD><TD>212-889-5775</TD><TD>212-889-5764</TD><TD>billie#tinnes.com</TD><TD>http://www.billietinnes.com</TD></TR>
<TR><TD>Zackary</TD><TD>Mockus</TD><TD>Metropolitan Elevator Co</TD><TD>286 State St</TD><TD>Perth Amboy</TD><TD>Middlesex</TD><TD>NJ</TD><TD>08861</TD><TD>732-442-0638</TD><TD>732-442-5218</TD><TD>zackary#mockus.com</TD><TD>http://www.zackarymockus.com</TD></TR>
<TR><TD>Rosemarie</TD><TD>Fifield</TD><TD>Technology Services</TD><TD>3131 N Nimitz Hwy #-105</TD><TD>Honolulu</TD><TD>Honolulu</TD><TD>HI</TD><TD>96819</TD><TD>808-836-8966</TD><TD>808-836-6008</TD><TD>rosemarie#fifield.com</TD><TD>http://www.rosemariefifield.com</TD></TR>
<TR><TD>Bernard</TD><TD>Laboy</TD><TD>Century 21 Keewaydin Prop</TD><TD>22661 S Frontage Rd</TD><TD>Channahon</TD><TD>Will</TD><TD>IL</TD><TD>60410</TD><TD>815-467-0487</TD><TD>815-467-1244</TD><TD>bernard#laboy.com</TD><TD>http://www.bernardlaboy.com</TD></TR>
<TR><TD>Sue</TD><TD>Haakinson</TD><TD>Kim Peacock Beringhause</TD><TD>9617 N Metro Pky W</TD><TD>Phoenix</TD><TD>Maricopa</TD><TD>AZ</TD><TD>85051</TD><TD>602-953-2753</TD><TD>602-953-0355</TD><TD>sue#haakinson.com</TD><TD>http://www.suehaakinson.com</TD></TR>
<TR><TD>Valerie</TD><TD>Pou</TD><TD>Sea Port Record One Stop Inc</TD><TD>7475 Hamilton Blvd</TD><TD>Trexlertown</TD><TD>Lehigh</TD><TD>PA</TD><TD>18087</TD><TD>610-395-8743</TD><TD>610-395-6995</TD><TD>valerie#pou.com</TD><TD>http://www.valeriepou.com</TD></TR>
<TR><TD>Lashawn</TD><TD>Hasty</TD><TD>Kpff Consulting Engineers</TD><TD>815 S Glendora Ave</TD><TD>West Covina</TD><TD>Los Angeles</TD><TD>CA</TD><TD>91790</TD><TD>626-960-6738</TD><TD>626-960-1503</TD><TD>lashawn#hasty.com</TD><TD>http://www.lashawnhasty.com</TD></TR>
<TR><TD>Marianne</TD><TD>Earman</TD><TD>Albers Technologies Corp</TD><TD>6220 S Orange Blossom Trl</TD><TD>Orlando</TD><TD>Orange</TD><TD>FL</TD><TD>32809</TD><TD>407-857-0431</TD><TD>407-857-2506</TD><TD>marianne#earman.com</TD><TD>http://www.marianneearman.com</TD></TR>
<TR><TD>Justina</TD><TD>Dragaj</TD><TD>Uchner</TD><TD> David D Esq</TD><TD>2552 Poplar Ave</TD><TD>Memphis</TD><TD>Shelby</TD><TD>TN</TD><TD>38112</TD><TD>901-327-5336</TD><TD>901-327-2911</TD><TD>justina#dragaj.com</TD><TD>http://www.justinadragaj.com</TD></TR>
<TR><TD>Mandy</TD><TD>Mcdonnell</TD><TD>Southern Vermont Surveys</TD><TD>343 Bush St Se</TD><TD>Salem</TD><TD>Marion</TD><TD>OR</TD><TD>97302</TD><TD>503-371-8219</TD><TD>503-371-1118</TD><TD>mandy#mcdonnell.com</TD><TD>http://www.mandymcdonnell.com</TD></TR>
<TR><TD>Conrad</TD><TD>Lanfear</TD><TD>Kahler</TD><TD> Karen T Esq</TD><TD>49 Roche Way</TD><TD>Youngstown</TD><TD>Mahoning</TD><TD>OH</TD><TD>44512</TD><TD>330-758-0314</TD><TD>330-758-3536</TD><TD>conrad#lanfear.com</TD><TD>http://www.conradlanfear.com</TD></TR>
<TR><TD>Cyril</TD><TD>Behen</TD><TD>National Paper & Envelope Corp</TD><TD>1650 S Harbor Blvd</TD><TD>Anaheim</TD><TD>Orange</TD><TD>CA</TD><TD>92802</TD><TD>714-772-5050</TD><TD>714-772-3859</TD><TD>cyril#behen.com</TD><TD>http://www.cyrilbehen.com</TD></TR>
<TR><TD>Shelley</TD><TD>Groden</TD><TD>Norton</TD><TD> Robert L Esq</TD><TD>110 Broadway St</TD><TD>San Antonio</TD><TD>Bexar</TD><TD>TX</TD><TD>78205</TD><TD>210-229-3017</TD><TD>210-229-9757</TD><TD>shelley#groden.com</TD><TD>http://www.shelleygroden.com</TD></TR>
I expect you can figure out the rest since that's just printing the head and tail lines.