GraphQL API and utilities for the rpdata project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

127 lines
2.9 KiB

  1. package parsers
  2. import (
  3. "bufio"
  4. "fmt"
  5. "git.aiterp.net/rpdata/api/models"
  6. "strings"
  7. "time"
  8. )
  9. // A ParsedLog contains the parsed log header and its posts.
  10. type ParsedLog struct {
  11. Log models.Log
  12. Posts []*models.Post
  13. }
  14. // ForumLog parses the logs from the data.
  15. func ForumLog(data string, tz *time.Location) ([]ParsedLog, error) {
  16. metadata := ForumLogMetadata(data)
  17. results := make([]ParsedLog, 0, len(metadata["Date"]))
  18. scanner := bufio.NewScanner(strings.NewReader(data))
  19. for i, dateStr := range metadata["Date"] {
  20. // Parse date
  21. date, err := time.ParseInLocation("January 2, 2006", dateStr, tz)
  22. if err != nil {
  23. return nil, fmt.Errorf("failed to parse date #%d: %#+v is not the in the correct format of \"January 2, 2006\"", i+1, dateStr)
  24. }
  25. // Parse posts
  26. posts := make([]*models.Post, 0, 128)
  27. parsing := false
  28. prev := ""
  29. prevPost := models.Post{}
  30. for scanner.Scan() {
  31. line := strings.Trim(scanner.Text(), "\r\t  ")
  32. // Skip lines with links to other logs using the --> and <-- notation.
  33. if strings.HasPrefix(line, "-") || strings.HasPrefix(line, "<") {
  34. prev = line
  35. continue
  36. }
  37. // If parsing and reaching a double empty-line, the session is done.
  38. if parsing && len(line) < 2 && len(prev) < 2 {
  39. break
  40. }
  41. // Skip empty lines, but record them as thep revious for the above check.
  42. if len(line) < 2 {
  43. prev = line
  44. continue
  45. }
  46. // If not parsing, skip until the first mirclike post.
  47. if !parsing {
  48. if strings.HasPrefix(line, "[") {
  49. parsing = true
  50. } else {
  51. prev = line
  52. continue
  53. }
  54. }
  55. // Parse the post.
  56. post, err := MircPost(line, date, prevPost)
  57. if err != nil {
  58. summary := ""
  59. for _, ru := range line {
  60. summary += string(ru)
  61. if len(summary) > 60 {
  62. summary += "..."
  63. break
  64. }
  65. }
  66. return nil, fmt.Errorf("failed to parse post: %s", summary)
  67. }
  68. posts = append(posts, &post)
  69. prevPost = post
  70. prev = line
  71. }
  72. // No posts means there's a problem.
  73. if len(posts) == 0 {
  74. return nil, fmt.Errorf("session %d (%s) has no posts (too many dates?)", i+1, dateStr)
  75. }
  76. // Add it.
  77. results = append(results, ParsedLog{
  78. Log: models.Log{Date: posts[0].Time},
  79. Posts: posts,
  80. })
  81. }
  82. return results, nil
  83. }
  84. // ForumLogMetadata parses metadata, discards the broken parts, and returns the
  85. // parsed data as a map (`m`) and the position of the first IRC post (`n`)
  86. func ForumLogMetadata(data string) map[string][]string {
  87. result := make(map[string][]string)
  88. key := ""
  89. scanner := bufio.NewScanner(strings.NewReader(data))
  90. for scanner.Scan() {
  91. line := strings.Trim(scanner.Text(), "\r\t  ")
  92. if strings.HasPrefix(line, "[") {
  93. break
  94. }
  95. if len(line) < 1 {
  96. key = ""
  97. continue
  98. }
  99. if key == "" {
  100. split := strings.Split(line, ":")
  101. key = split[0]
  102. } else {
  103. result[key] = append(result[key], line)
  104. }
  105. }
  106. return result
  107. }